diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 75259cb7e2f05..2dafaf277be8f 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -182,12 +182,12 @@ class TestDatetime64SeriesComparison: @pytest.mark.parametrize( "op, expected", [ - (operator.eq, Series([False, False, True])), - (operator.ne, Series([True, True, False])), - (operator.lt, Series([False, False, False])), - (operator.gt, Series([False, False, False])), - (operator.ge, Series([False, False, True])), - (operator.le, Series([False, False, True])), + (operator.eq, [False, False, True]), + (operator.ne, [True, True, False]), + (operator.lt, [False, False, False]), + (operator.gt, [False, False, False]), + (operator.ge, [False, False, True]), + (operator.le, [False, False, True]), ], ) def test_nat_comparisons( @@ -210,7 +210,7 @@ def test_nat_comparisons( result = op(left, right) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, Series(expected)) @pytest.mark.parametrize( "data", @@ -1485,11 +1485,10 @@ def test_dt64arr_add_sub_DateOffsets( @pytest.mark.parametrize( "other", [ - np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]), - np.array([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]), - np.array( # matching offsets - [pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)] - ), + [pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], + [pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()], + # matching offsets + [pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)], ], ) @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) @@ -1502,7 +1501,7 @@ def test_dt64arr_add_sub_offset_array( tz = tz_naive_fixture dti = date_range("2017-01-01", periods=2, tz=tz) dtarr = tm.box_expected(dti, box_with_array) - + other = np.array(other) expected = DatetimeIndex([op(dti[n], other[n]) for n in range(len(dti))]) expected = tm.box_expected(expected, box_with_array).astype(object) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b2007209dd5b9..3e9508bd2f504 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1960,19 +1960,20 @@ def test_td64arr_floordiv_numeric_scalar(self, box_with_array, two): two // tdser @pytest.mark.parametrize( - "vector", - [np.array([20, 30, 40]), Index([20, 30, 40]), Series([20, 30, 40])], - ids=lambda x: type(x).__name__, + "klass", + [np.array, Index, Series], + ids=lambda x: x.__name__, ) def test_td64arr_rmul_numeric_array( self, box_with_array, - vector, + klass, any_real_numpy_dtype, ): # GH#4521 # divide/multiply by integers + vector = klass([20, 30, 40]) tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") vector = vector.astype(any_real_numpy_dtype) @@ -1990,16 +1991,17 @@ def test_td64arr_rmul_numeric_array( tm.assert_equal(result, expected) @pytest.mark.parametrize( - "vector", - [np.array([20, 30, 40]), Index([20, 30, 40]), Series([20, 30, 40])], - ids=lambda x: type(x).__name__, + "klass", + [np.array, Index, Series], + ids=lambda x: x.__name__, ) def test_td64arr_div_numeric_array( - self, box_with_array, vector, any_real_numpy_dtype + self, box_with_array, klass, any_real_numpy_dtype ): # GH#4521 # divide/multiply by integers + vector = klass([20, 30, 40]) tdser = Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") vector = vector.astype(any_real_numpy_dtype) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 5e1c5c64fa660..33c55b2090bd6 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -51,12 +51,10 @@ def test_setitem(self): tm.assert_categorical_equal(c, expected) - @pytest.mark.parametrize( - "other", - [Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])], - ) - def test_setitem_same_but_unordered(self, other): + @pytest.mark.parametrize("categories", [None, ["b", "a"]]) + def test_setitem_same_but_unordered(self, categories): # GH-24142 + other = Categorical(["b", "a"], categories=categories) target = Categorical(["a", "b"], categories=["a", "b"]) mask = np.array([True, False]) target[mask] = other[mask] diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 4174d2adc810b..8778df832d4d7 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -307,29 +307,23 @@ def test_comparisons(self, data, reverse, base): with pytest.raises(TypeError, match=msg): a < cat_rev - @pytest.mark.parametrize( - "ctor", - [ - lambda *args, **kwargs: Categorical(*args, **kwargs), - lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), - ], - ) - def test_unordered_different_order_equal(self, ctor): + @pytest.mark.parametrize("box", [lambda x: x, Series]) + def test_unordered_different_order_equal(self, box): # https://github.com/pandas-dev/pandas/issues/16014 - c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) - c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + c1 = box(Categorical(["a", "b"], categories=["a", "b"], ordered=False)) + c2 = box(Categorical(["a", "b"], categories=["b", "a"], ordered=False)) assert (c1 == c2).all() - c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) - c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) + c1 = box(Categorical(["a", "b"], categories=["a", "b"], ordered=False)) + c2 = box(Categorical(["b", "a"], categories=["b", "a"], ordered=False)) assert (c1 != c2).all() - c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) - c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) + c1 = box(Categorical(["a", "a"], categories=["a", "b"], ordered=False)) + c2 = box(Categorical(["b", "b"], categories=["b", "a"], ordered=False)) assert (c1 != c2).all() - c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) - c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + c1 = box(Categorical(["a", "a"], categories=["a", "b"], ordered=False)) + c2 = box(Categorical(["a", "b"], categories=["b", "a"], ordered=False)) result = c1 == c2 tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 234f4092421e5..6fcbfe96a3df7 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -99,15 +99,15 @@ def test_construct_from_string_raises(): @pytest.mark.parametrize( "dtype, expected", [ - (SparseDtype(int), True), - (SparseDtype(float), True), - (SparseDtype(bool), True), - (SparseDtype(object), False), - (SparseDtype(str), False), + (int, True), + (float, True), + (bool, True), + (object, False), + (str, False), ], ) def test_is_numeric(dtype, expected): - assert dtype._is_numeric is expected + assert SparseDtype(dtype)._is_numeric is expected def test_str_uses_object(): diff --git a/pandas/tests/arrays/sparse/test_reductions.py b/pandas/tests/arrays/sparse/test_reductions.py index f44423d5e635c..4171d1213a0dc 100644 --- a/pandas/tests/arrays/sparse/test_reductions.py +++ b/pandas/tests/arrays/sparse/test_reductions.py @@ -126,13 +126,13 @@ def test_sum(self): @pytest.mark.parametrize( "arr", - [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])], + [[0, 1, np.nan, 1], [0, 1, 1]], ) @pytest.mark.parametrize("fill_value", [0, 1, np.nan]) @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) def test_sum_min_count(self, arr, fill_value, min_count, expected): # GH#25777 - sparray = SparseArray(arr, fill_value=fill_value) + sparray = SparseArray(np.array(arr), fill_value=fill_value) result = sparray.sum(min_count=min_count) if np.isnan(expected): assert np.isnan(result) @@ -296,11 +296,9 @@ def test_argmax_argmin(self, arr, argmax_expected, argmin_expected): assert argmax_result == argmax_expected assert argmin_result == argmin_expected - @pytest.mark.parametrize( - "arr,method", - [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")], - ) - def test_empty_array(self, arr, method): + @pytest.mark.parametrize("method", ["argmax", "argmin"]) + def test_empty_array(self, method): msg = f"attempt to get {method} of an empty sequence" + arr = SparseArray([]) with pytest.raises(ValueError, match=msg): - arr.argmax() if method == "argmax" else arr.argmin() + getattr(arr, method)() diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index fe0f1f1454a55..ad35742a7b337 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -499,22 +499,23 @@ def test_to_numpy_dataframe_na_value(data, dtype, na_value): @pytest.mark.parametrize( - "data, expected", + "data, expected_data", [ ( {"a": pd.array([1, 2, None])}, - np.array([[1.0], [2.0], [np.nan]], dtype=float), + [[1.0], [2.0], [np.nan]], ), ( {"a": [1, 2, 3], "b": [1, 2, 3]}, - np.array([[1, 1], [2, 2], [3, 3]], dtype=float), + [[1, 1], [2, 2], [3, 3]], ), ], ) -def test_to_numpy_dataframe_single_block(data, expected): +def test_to_numpy_dataframe_single_block(data, expected_data): # https://github.com/pandas-dev/pandas/issues/33820 df = pd.DataFrame(data) result = df.to_numpy(dtype=float, na_value=np.nan) + expected = np.array(expected_data, dtype=float) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 7969e684f5b04..b69fb573987f9 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -522,14 +522,15 @@ def test_series_negate(self, engine, parser): "lhs", [ # Float - DataFrame(np.random.default_rng(2).standard_normal((5, 2))), + np.random.default_rng(2).standard_normal((5, 2)), # Int - DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))), + np.random.default_rng(2).integers(5, size=(5, 2)), # bool doesn't work with numexpr but works elsewhere - DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5), + np.array([True, False, True, False, True], dtype=np.bool_), ], ) def test_frame_pos(self, lhs, engine, parser): + lhs = DataFrame(lhs) expr = "+lhs" expect = lhs @@ -540,14 +541,15 @@ def test_frame_pos(self, lhs, engine, parser): "lhs", [ # Float - Series(np.random.default_rng(2).standard_normal(5)), + np.random.default_rng(2).standard_normal(5), # Int - Series(np.random.default_rng(2).integers(5, size=5)), + np.random.default_rng(2).integers(5, size=5), # bool doesn't work with numexpr but works elsewhere - Series(np.random.default_rng(2).standard_normal(5) > 0.5), + np.array([True, False, True, False, True], dtype=np.bool_), ], ) def test_series_pos(self, lhs, engine, parser): + lhs = Series(lhs) expr = "+lhs" expect = lhs diff --git a/pandas/tests/copy_view/index/test_datetimeindex.py b/pandas/tests/copy_view/index/test_datetimeindex.py index b023297c9549d..5dd1f45a94ff3 100644 --- a/pandas/tests/copy_view/index/test_datetimeindex.py +++ b/pandas/tests/copy_view/index/test_datetimeindex.py @@ -13,17 +13,11 @@ ) -@pytest.mark.parametrize( - "cons", - [ - lambda x: DatetimeIndex(x), - lambda x: DatetimeIndex(DatetimeIndex(x)), - ], -) -def test_datetimeindex(using_copy_on_write, cons): +@pytest.mark.parametrize("box", [lambda x: x, DatetimeIndex]) +def test_datetimeindex(using_copy_on_write, box): dt = date_range("2019-12-31", periods=3, freq="D") ser = Series(dt) - idx = cons(ser) + idx = box(DatetimeIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Timestamp("2020-12-31") if using_copy_on_write: diff --git a/pandas/tests/copy_view/index/test_periodindex.py b/pandas/tests/copy_view/index/test_periodindex.py index b80ce1d3d838f..753304a1a8963 100644 --- a/pandas/tests/copy_view/index/test_periodindex.py +++ b/pandas/tests/copy_view/index/test_periodindex.py @@ -13,17 +13,11 @@ ) -@pytest.mark.parametrize( - "cons", - [ - lambda x: PeriodIndex(x), - lambda x: PeriodIndex(PeriodIndex(x)), - ], -) -def test_periodindex(using_copy_on_write, cons): +@pytest.mark.parametrize("box", [lambda x: x, PeriodIndex]) +def test_periodindex(using_copy_on_write, box): dt = period_range("2019-12-31", periods=3, freq="D") ser = Series(dt) - idx = cons(ser) + idx = box(PeriodIndex(ser)) expected = idx.copy(deep=True) ser.iloc[0] = Period("2020-12-31") if using_copy_on_write: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 5eeab778c184c..f1f5cb1620345 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1078,15 +1078,15 @@ def test_integers(self): @pytest.mark.parametrize( "arr, skipna", [ - (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), - (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), - (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), - (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ([1, 2, np.nan, np.nan, 3], False), + ([1, 2, np.nan, np.nan, 3], True), + ([1, 2, 3, np.int64(4), np.int32(5), np.nan], False), + ([1, 2, 3, np.int64(4), np.int32(5), np.nan], True), ], ) def test_integer_na(self, arr, skipna): # GH 27392 - result = lib.infer_dtype(arr, skipna=skipna) + result = lib.infer_dtype(np.array(arr, dtype="O"), skipna=skipna) expected = "integer" if skipna else "integer-na" assert result == expected @@ -1287,13 +1287,13 @@ def test_infer_dtype_mixed_integer(self): @pytest.mark.parametrize( "arr", [ - np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]), - np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]), - np.array([datetime(2011, 1, 1), Timestamp("2011-01-02")]), + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], + [datetime(2011, 1, 1), datetime(2012, 2, 1)], + [datetime(2011, 1, 1), Timestamp("2011-01-02")], ], ) def test_infer_dtype_datetime(self, arr): - assert lib.infer_dtype(arr, skipna=True) == "datetime" + assert lib.infer_dtype(np.array(arr), skipna=True) == "datetime" @pytest.mark.parametrize("na_value", [pd.NaT, np.nan]) @pytest.mark.parametrize( @@ -1902,14 +1902,15 @@ def test_is_scalar_numpy_array_scalars(self): @pytest.mark.parametrize( "zerodim", [ - np.array(1), - np.array("foobar"), - np.array(np.datetime64("2014-01-01")), - np.array(np.timedelta64(1, "h")), - np.array(np.datetime64("NaT")), + 1, + "foobar", + np.datetime64("2014-01-01"), + np.timedelta64(1, "h"), + np.datetime64("NaT"), ], ) def test_is_scalar_numpy_zerodim_arrays(self, zerodim): + zerodim = np.array(zerodim) assert not is_scalar(zerodim) assert is_scalar(lib.item_from_zerodim(zerodim)) diff --git a/pandas/tests/frame/indexing/test_get.py b/pandas/tests/frame/indexing/test_get.py index 5f2651eec683c..75bad0ec1f159 100644 --- a/pandas/tests/frame/indexing/test_get.py +++ b/pandas/tests/frame/indexing/test_get.py @@ -15,13 +15,13 @@ def test_get(self, float_frame): ) @pytest.mark.parametrize( - "df", + "columns, index", [ - DataFrame(), - DataFrame(columns=list("AB")), - DataFrame(columns=list("AB"), index=range(3)), + [None, None], + [list("AB"), None], + [list("AB"), range(3)], ], ) - def test_get_none(self, df): + def test_get_none(self, columns, index): # see gh-5652 - assert df.get(None) is None + assert DataFrame(columns=columns, index=index).get(None) is None diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 06cd51b43a0aa..a3ae3991522c2 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -232,15 +232,13 @@ def test_drop_api_equivalence(self): with pytest.raises(ValueError, match=msg): df.drop(axis=1) - data = [[1, 2, 3], [1, 2, 3]] - @pytest.mark.parametrize( "actual", [ - DataFrame(data=data, index=["a", "a"]), - DataFrame(data=data, index=["a", "b"]), - DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), - DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), + DataFrame([[1, 2, 3], [1, 2, 3]], index=["a", "a"]), + DataFrame([[1, 2, 3], [1, 2, 3]], index=["a", "b"]), + DataFrame([[1, 2, 3], [1, 2, 3]], index=["a", "b"]).set_index([0, 1]), + DataFrame([[1, 2, 3], [1, 2, 3]], index=["a", "a"]).set_index([0, 1]), ], ) def test_raise_on_drop_duplicate_index(self, actual): diff --git a/pandas/tests/frame/methods/test_filter.py b/pandas/tests/frame/methods/test_filter.py index 382615aaef627..dc84e2adf1239 100644 --- a/pandas/tests/frame/methods/test_filter.py +++ b/pandas/tests/frame/methods/test_filter.py @@ -98,15 +98,16 @@ def test_filter_regex_search(self, float_frame): tm.assert_frame_equal(result, exp) @pytest.mark.parametrize( - "name,expected", + "name,expected_data", [ - ("a", DataFrame({"a": [1, 2]})), - ("あ", DataFrame({"あ": [3, 4]})), + ("a", {"a": [1, 2]}), + ("あ", {"あ": [3, 4]}), ], ) - def test_filter_unicode(self, name, expected): + def test_filter_unicode(self, name, expected_data): # GH13101 df = DataFrame({"a": [1, 2], "あ": [3, 4]}) + expected = DataFrame(expected_data) tm.assert_frame_equal(df.filter(like=name), expected) tm.assert_frame_equal(df.filter(regex=name), expected) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 2a889efe79064..da6d69f36f900 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1219,13 +1219,7 @@ def test_reindex_empty_frame(self, kwargs): expected = DataFrame({"a": [np.nan] * 3}, index=idx, dtype=object) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "src_idx", - [ - Index([]), - CategoricalIndex([]), - ], - ) + @pytest.mark.parametrize("src_idx", [Index, CategoricalIndex]) @pytest.mark.parametrize( "cat_idx", [ @@ -1240,7 +1234,7 @@ def test_reindex_empty_frame(self, kwargs): ], ) def test_reindex_empty(self, src_idx, cat_idx): - df = DataFrame(columns=src_idx, index=["K"], dtype="f8") + df = DataFrame(columns=src_idx([]), index=["K"], dtype="f8") result = df.reindex(columns=cat_idx) expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8") @@ -1281,36 +1275,14 @@ def test_reindex_datetimelike_to_object(self, dtype): assert res.iloc[-1, 1] is fv tm.assert_frame_equal(res, expected) - @pytest.mark.parametrize( - "index_df,index_res,index_exp", - [ - ( - CategoricalIndex([], categories=["A"]), - Index(["A"]), - Index(["A"]), - ), - ( - CategoricalIndex([], categories=["A"]), - Index(["B"]), - Index(["B"]), - ), - ( - CategoricalIndex([], categories=["A"]), - CategoricalIndex(["A"]), - CategoricalIndex(["A"]), - ), - ( - CategoricalIndex([], categories=["A"]), - CategoricalIndex(["B"]), - CategoricalIndex(["B"]), - ), - ], - ) - def test_reindex_not_category(self, index_df, index_res, index_exp): + @pytest.mark.parametrize("klass", [Index, CategoricalIndex]) + @pytest.mark.parametrize("data", ["A", "B"]) + def test_reindex_not_category(self, klass, data): # GH#28690 - df = DataFrame(index=index_df) - result = df.reindex(index=index_res) - expected = DataFrame(index=index_exp) + df = DataFrame(index=CategoricalIndex([], categories=["A"])) + idx = klass([data]) + result = df.reindex(index=idx) + expected = DataFrame(index=idx) tm.assert_frame_equal(result, expected) def test_invalid_method(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d44de380d243a..6d52bf161f4fa 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -234,16 +234,9 @@ def test_empty_constructor(self, constructor): assert len(result.columns) == 0 tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "constructor", - [ - lambda: DataFrame({}), - lambda: DataFrame(data={}), - ], - ) - def test_empty_constructor_object_index(self, constructor): + def test_empty_constructor_object_index(self): expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0)) - result = constructor() + result = DataFrame({}) assert len(result.index) == 0 assert len(result.columns) == 0 tm.assert_frame_equal(result, expected, check_index_type=True) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 90524861ce311..1d8f50668cee2 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1217,16 +1217,16 @@ def test_stack_preserve_categorical_dtype_values(self, future_stack): ) @pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning") @pytest.mark.parametrize( - "index, columns", + "index", [ - ([0, 0, 1, 1], MultiIndex.from_product([[1, 2], ["a", "b"]])), - ([0, 0, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), - ([0, 1, 2, 3], MultiIndex.from_product([[1, 2], ["a", "b"]])), + [0, 0, 1, 1], + [0, 0, 2, 3], + [0, 1, 2, 3], ], ) - def test_stack_multi_columns_non_unique_index(self, index, columns, future_stack): + def test_stack_multi_columns_non_unique_index(self, index, future_stack): # GH-28301 - + columns = MultiIndex.from_product([[1, 2], ["a", "b"]]) df = DataFrame(index=index, columns=columns).fillna(1) stacked = df.stack(future_stack=future_stack) new_index = MultiIndex.from_tuples(stacked.index.to_numpy()) @@ -1720,11 +1720,10 @@ def test_stack(self, multiindex_year_month_day_dataframe_random_data, future_sta tm.assert_equal(result, expected) @pytest.mark.parametrize( - "idx, columns, exp_idx", + "idx, exp_idx", [ [ list("abab"), - ["1st", "2nd", "1st"], MultiIndex( levels=[["a", "b"], ["1st", "2nd"]], codes=[np.tile(np.arange(2).repeat(3), 2), np.tile([0, 1, 0], 4)], @@ -1732,7 +1731,6 @@ def test_stack(self, multiindex_year_month_day_dataframe_random_data, future_sta ], [ MultiIndex.from_tuples((("a", 2), ("b", 1), ("a", 1), ("b", 2))), - ["1st", "2nd", "1st"], MultiIndex( levels=[["a", "b"], [1, 2], ["1st", "2nd"]], codes=[ @@ -1744,12 +1742,12 @@ def test_stack(self, multiindex_year_month_day_dataframe_random_data, future_sta ], ], ) - def test_stack_duplicate_index(self, idx, columns, exp_idx, future_stack): + def test_stack_duplicate_index(self, idx, exp_idx, future_stack): # GH10417 df = DataFrame( np.arange(12).reshape(4, 3), index=idx, - columns=columns, + columns=["1st", "2nd", "1st"], ) if future_stack: msg = "Columns with duplicate values are not supported in stack" diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index 850c92013694f..e89175ceff0c1 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -13,17 +13,16 @@ class TestDataFrameUnaryOperators: # __pos__, __neg__, __invert__ @pytest.mark.parametrize( - "df,expected", + "df_data,expected_data", [ - (pd.DataFrame({"a": [-1, 1]}), pd.DataFrame({"a": [1, -1]})), - (pd.DataFrame({"a": [False, True]}), pd.DataFrame({"a": [True, False]})), - ( - pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), - pd.DataFrame({"a": pd.Series(pd.to_timedelta([1, -1]))}), - ), + ([-1, 1], [1, -1]), + ([False, True], [True, False]), + (pd.to_timedelta([-1, 1]), pd.to_timedelta([1, -1])), ], ) - def test_neg_numeric(self, df, expected): + def test_neg_numeric(self, df_data, expected_data): + df = pd.DataFrame({"a": df_data}) + expected = pd.DataFrame({"a": expected_data}) tm.assert_frame_equal(-df, expected) tm.assert_series_equal(-df["a"], expected["a"]) @@ -42,13 +41,14 @@ def test_neg_object(self, df, expected): tm.assert_series_equal(-df["a"], expected["a"]) @pytest.mark.parametrize( - "df", + "df_data", [ - pd.DataFrame({"a": ["a", "b"]}), - pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}), + ["a", "b"], + pd.to_datetime(["2017-01-22", "1970-01-01"]), ], ) - def test_neg_raises(self, df, using_infer_string): + def test_neg_raises(self, df_data, using_infer_string): + df = pd.DataFrame({"a": df_data}) msg = ( "bad operand type for unary -: 'str'|" r"bad operand type for unary -: 'DatetimeArray'" @@ -102,44 +102,36 @@ def test_invert_empty_not_input(self): assert df is not result @pytest.mark.parametrize( - "df", + "df_data", [ - pd.DataFrame({"a": [-1, 1]}), - pd.DataFrame({"a": [False, True]}), - pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + [-1, 1], + [False, True], + pd.to_timedelta([-1, 1]), ], ) - def test_pos_numeric(self, df): + def test_pos_numeric(self, df_data): # GH#16073 + df = pd.DataFrame({"a": df_data}) tm.assert_frame_equal(+df, df) tm.assert_series_equal(+df["a"], df["a"]) @pytest.mark.parametrize( - "df", + "df_data", [ - pd.DataFrame({"a": np.array([-1, 2], dtype=object)}), - pd.DataFrame({"a": [Decimal("-1.0"), Decimal("2.0")]}), + np.array([-1, 2], dtype=object), + [Decimal("-1.0"), Decimal("2.0")], ], ) - def test_pos_object(self, df): + def test_pos_object(self, df_data): # GH#21380 + df = pd.DataFrame({"a": df_data}) tm.assert_frame_equal(+df, df) tm.assert_series_equal(+df["a"], df["a"]) - @pytest.mark.parametrize( - "df", - [ - pytest.param( - pd.DataFrame({"a": ["a", "b"]}), - # filterwarnings removable once min numpy version is 1.25 - marks=[ - pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning") - ], - ), - ], - ) - def test_pos_object_raises(self, df): + @pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning") + def test_pos_object_raises(self): # GH#21380 + df = pd.DataFrame({"a": ["a", "b"]}) if np_version_gte1p25: with pytest.raises( TypeError, match=r"^bad operand type for unary \+: \'str\'$" @@ -148,10 +140,8 @@ def test_pos_object_raises(self, df): else: tm.assert_series_equal(+df["a"], df["a"]) - @pytest.mark.parametrize( - "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})] - ) - def test_pos_raises(self, df): + def test_pos_raises(self): + df = pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}) msg = r"bad operand type for unary \+: 'DatetimeArray'" with pytest.raises(TypeError, match=msg): (+df) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index f54db07824daf..07f76810cbfc8 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -45,12 +45,11 @@ def test_preserved_series(self, func): s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) assert func(s).flags.allows_duplicate_labels is False - @pytest.mark.parametrize( - "other", [pd.Series(0, index=["a", "b", "c"]), pd.Series(0, index=["a", "b"])] - ) + @pytest.mark.parametrize("index", [["a", "b", "c"], ["a", "b"]]) # TODO: frame @not_implemented - def test_align(self, other): + def test_align(self, index): + other = pd.Series(0, index=index) s = pd.Series([0, 1], index=["a", "b"]).set_flags(allows_duplicate_labels=False) a, b = s.align(other) assert a.flags.allows_duplicate_labels is False @@ -298,23 +297,15 @@ def test_getitem_raises(self, getter, target): with pytest.raises(pd.errors.DuplicateLabelError, match=msg): getter(target) - @pytest.mark.parametrize( - "objs, kwargs", - [ - ( - [ - pd.Series(1, index=[0, 1], name="a"), - pd.Series(2, index=[0, 1], name="a"), - ], - {"axis": 1}, - ) - ], - ) - def test_concat_raises(self, objs, kwargs): + def test_concat_raises(self): + objs = [ + pd.Series(1, index=[0, 1], name="a"), + pd.Series(2, index=[0, 1], name="a"), + ] objs = [x.set_flags(allows_duplicate_labels=False) for x in objs] msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): - pd.concat(objs, **kwargs) + pd.concat(objs, axis=1) @not_implemented def test_merge_raises(self): diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 0596193c137e1..f5818d95020aa 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -540,46 +540,44 @@ def test_sum_uint64_overflow(): @pytest.mark.parametrize( - "structure, expected", + "structure, cast_as", [ - (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), - (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), - ( - lambda x: tuple(x), - DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), - ), - ( - lambda x: list(x), - DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), - ), + (tuple, tuple), + (list, list), + (lambda x: tuple(x), tuple), + (lambda x: list(x), list), ], ) -def test_agg_structs_dataframe(structure, expected): +def test_agg_structs_dataframe(structure, cast_as): df = DataFrame( {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} ) result = df.groupby(["A", "B"]).aggregate(structure) + expected = DataFrame( + {"C": {(1, 1): cast_as([1, 1, 1]), (3, 4): cast_as([3, 4, 4])}} + ) expected.index.names = ["A", "B"] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "structure, expected", + "structure, cast_as", [ - (tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), - (list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), - (lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), - (lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + (tuple, tuple), + (list, list), + (lambda x: tuple(x), tuple), + (lambda x: list(x), list), ], ) -def test_agg_structs_series(structure, expected): +def test_agg_structs_series(structure, cast_as): # Issue #18079 df = DataFrame( {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} ) result = df.groupby("A")["C"].aggregate(structure) + expected = Series([cast_as([1, 1, 1]), cast_as([3, 4, 4])], index=[1, 3], name="C") expected.index.name = "A" tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index ac5374597585a..07d52308e308a 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -41,24 +41,27 @@ def test_mgr_locs_updated(func): "binner,closed,expected", [ ( - np.array([0, 3, 6, 9], dtype=np.int64), + [0, 3, 6, 9], "left", - np.array([2, 5, 6], dtype=np.int64), + [2, 5, 6], ), ( - np.array([0, 3, 6, 9], dtype=np.int64), + [0, 3, 6, 9], "right", - np.array([3, 6, 6], dtype=np.int64), + [3, 6, 6], ), - (np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)), + ([0, 3, 6], "left", [2, 5]), ( - np.array([0, 3, 6], dtype=np.int64), + [0, 3, 6], "right", - np.array([3, 6], dtype=np.int64), + [3, 6], ), ], ) def test_generate_bins(binner, closed, expected): values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - result = lib.generate_bins_dt64(values, binner, closed=closed) + result = lib.generate_bins_dt64( + values, np.array(binner, dtype=np.int64), closed=closed + ) + expected = np.array(expected, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 038f59f8ea80f..14c5c21d41772 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -350,12 +350,9 @@ def test_basic_regression(): grouped.mean() -@pytest.mark.parametrize( - "dtype", ["float64", "float32", "int64", "int32", "int16", "int8"] -) -def test_with_na_groups(dtype): +def test_with_na_groups(any_real_numpy_dtype): index = Index(np.arange(10)) - values = Series(np.ones(10), index, dtype=dtype) + values = Series(np.ones(10), index, dtype=any_real_numpy_dtype) labels = Series( [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"], index=index,