TST/CLN: Test parametrizations 4 (#56787)

mroeschke · web-flow · commit 52cb549f443f · 2024-01-11T08:56:06.000-08:00
* TST/CLN: Test parametrizations

* Fix signature

* Add object
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
@@ -515,13 +515,11 @@ def test_replace_compiled_regex_callable(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "regex,expected", [(True, ["bao", "bao", np.nan]), (False, ["bao", "foo", np.nan])]
-)
-def test_replace_literal(regex, expected, any_string_dtype):
+@pytest.mark.parametrize("regex,expected_val", [(True, "bao"), (False, "foo")])
+def test_replace_literal(regex, expected_val, any_string_dtype):
     # GH16808 literal replace (regex=False vs regex=True)
     ser = Series(["f.o", "foo", np.nan], dtype=any_string_dtype)
-    expected = Series(expected, dtype=any_string_dtype)
+    expected = Series(["bao", expected_val, np.nan], dtype=any_string_dtype)
     result = ser.str.replace("f.", "ba", regex=regex)
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py
@@ -190,23 +190,24 @@ def test_split_maxsplit(data, pat, any_string_dtype, n):
 
 
 @pytest.mark.parametrize(
-    "data, pat, expected",
+    "data, pat, expected_val",
     [
         (
             ["split once", "split once too!"],
             None,
-            Series({0: ["split", "once"], 1: ["split", "once too!"]}),
+            "once too!",
         ),
         (
             ["split_once", "split_once_too!"],
             "_",
-            Series({0: ["split", "once"], 1: ["split", "once_too!"]}),
+            "once_too!",
         ),
     ],
 )
-def test_split_no_pat_with_nonzero_n(data, pat, expected, any_string_dtype):
+def test_split_no_pat_with_nonzero_n(data, pat, expected_val, any_string_dtype):
     s = Series(data, dtype=any_string_dtype)
     result = s.str.split(pat=pat, n=1)
+    expected = Series({0: ["split", "once"], 1: ["split", expected_val]})
     tm.assert_series_equal(expected, result, check_index_type=False)
 
 
@@ -533,37 +534,27 @@ def test_partition_series_stdlib(any_string_dtype, method):
 
 
 @pytest.mark.parametrize(
-    "method, expand, exp, exp_levels",
+    "method, exp",
     [
         [
             "partition",
-            False,
-            np.array(
-                [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],
-                dtype=object,
-            ),
-            1,
+            [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None],
         ],
         [
             "rpartition",
-            False,
-            np.array(
-                [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],
-                dtype=object,
-            ),
-            1,
+            [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None],
         ],
     ],
 )
-def test_partition_index(method, expand, exp, exp_levels):
+def test_partition_index(method, exp):
     # https://github.com/pandas-dev/pandas/issues/23558
 
     values = Index(["a_b_c", "c_d_e", "f_g_h", np.nan, None])
 
-    result = getattr(values.str, method)("_", expand=expand)
-    exp = Index(exp)
+    result = getattr(values.str, method)("_", expand=False)
+    exp = Index(np.array(exp, dtype=object), dtype=object)
     tm.assert_index_equal(result, exp)
-    assert result.nlevels == exp_levels
+    assert result.nlevels == 1
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py
@@ -67,13 +67,7 @@ def test_catch_oob():
         pd.Timestamp("15000101").as_unit("ns")
 
 
-@pytest.mark.parametrize(
-    "is_local",
-    [
-        True,
-        False,
-    ],
-)
+@pytest.mark.parametrize("is_local", [True, False])
 def test_catch_undefined_variable_error(is_local):
     variable_name = "x"
     if is_local:
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
@@ -212,51 +212,54 @@ def test_to_datetime_format_YYYYMMDD_with_none(self, input_s):
         [
             # NaN before strings with invalid date values
             [
-                Series(["19801222", np.nan, "20010012", "10019999"]),
-                Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
+                ["19801222", np.nan, "20010012", "10019999"],
+                [Timestamp("19801222"), np.nan, np.nan, np.nan],
             ],
             # NaN after strings with invalid date values
             [
-                Series(["19801222", "20010012", "10019999", np.nan]),
-                Series([Timestamp("19801222"), np.nan, np.nan, np.nan]),
+                ["19801222", "20010012", "10019999", np.nan],
+                [Timestamp("19801222"), np.nan, np.nan, np.nan],
             ],
             # NaN before integers with invalid date values
             [
-                Series([20190813, np.nan, 20010012, 20019999]),
-                Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
+                [20190813, np.nan, 20010012, 20019999],
+                [Timestamp("20190813"), np.nan, np.nan, np.nan],
             ],
             # NaN after integers with invalid date values
             [
-                Series([20190813, 20010012, np.nan, 20019999]),
-                Series([Timestamp("20190813"), np.nan, np.nan, np.nan]),
+                [20190813, 20010012, np.nan, 20019999],
+                [Timestamp("20190813"), np.nan, np.nan, np.nan],
             ],
         ],
     )
     def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
         # GH 25512
         # format='%Y%m%d', errors='coerce'
+        input_s = Series(input_s)
         result = to_datetime(input_s, format="%Y%m%d", errors="coerce")
+        expected = Series(expected)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize(
         "data, format, expected",
         [
-            ([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])),
-            ([pd.NA], None, DatetimeIndex(["NaT"])),
+            ([pd.NA], "%Y%m%d%H%M%S", ["NaT"]),
+            ([pd.NA], None, ["NaT"]),
             (
                 [pd.NA, "20210202202020"],
                 "%Y%m%d%H%M%S",
-                DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
+                ["NaT", "2021-02-02 20:20:20"],
             ),
-            (["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
-            (["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
-            ([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
-            ([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
+            (["201010", pd.NA], "%y%m%d", ["2020-10-10", "NaT"]),
+            (["201010", pd.NA], "%d%m%y", ["2010-10-20", "NaT"]),
+            ([None, np.nan, pd.NA], None, ["NaT", "NaT", "NaT"]),
+            ([None, np.nan, pd.NA], "%Y%m%d", ["NaT", "NaT", "NaT"]),
         ],
     )
     def test_to_datetime_with_NA(self, data, format, expected):
         # GH#42957
         result = to_datetime(data, format=format)
+        expected = DatetimeIndex(expected)
         tm.assert_index_equal(result, expected)
 
     def test_to_datetime_with_NA_with_warning(self):
@@ -422,12 +425,12 @@ def test_parse_nanoseconds_with_formula(self, cache, arg):
     @pytest.mark.parametrize(
         "value,fmt,expected",
         [
-            ["2009324", "%Y%W%w", Timestamp("2009-08-13")],
-            ["2013020", "%Y%U%w", Timestamp("2013-01-13")],
+            ["2009324", "%Y%W%w", "2009-08-13"],
+            ["2013020", "%Y%U%w", "2013-01-13"],
         ],
     )
     def test_to_datetime_format_weeks(self, value, fmt, expected, cache):
-        assert to_datetime(value, format=fmt, cache=cache) == expected
+        assert to_datetime(value, format=fmt, cache=cache) == Timestamp(expected)
 
     @pytest.mark.parametrize(
         "fmt,dates,expected_dates",
@@ -715,24 +718,20 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal
         [
             pytest.param(
                 "%Y-%m-%d %H:%M:%S%z",
-                Index(
-                    [
-                        Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
-                        Timestamp("2000-01-02 02:00:00+0200", tz="UTC+02:00"),
-                        NaT,
-                    ]
-                ),
+                [
+                    Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
+                    Timestamp("2000-01-02 02:00:00+0200", tz="UTC+02:00"),
+                    NaT,
+                ],
                 id="ISO8601, non-UTC",
             ),
             pytest.param(
                 "%Y-%d-%m %H:%M:%S%z",
-                Index(
-                    [
-                        Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
-                        Timestamp("2000-02-01 02:00:00+0200", tz="UTC+02:00"),
-                        NaT,
-                    ]
-                ),
+                [
+                    Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"),
+                    Timestamp("2000-02-01 02:00:00+0200", tz="UTC+02:00"),
+                    NaT,
+                ],
                 id="non-ISO8601, non-UTC",
             ),
         ],
@@ -747,6 +746,7 @@ def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected):
                 format=fmt,
                 utc=False,
             )
+        expected = Index(expected)
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
@@ -596,21 +596,12 @@ def test_downcast_float64_to_float32():
     assert series.dtype == result.dtype
 
 
-@pytest.mark.parametrize(
-    "ser,expected",
-    [
-        (
-            Series([0, 9223372036854775808]),
-            Series([0, 9223372036854775808], dtype=np.uint64),
-        )
-    ],
-)
-def test_downcast_uint64(ser, expected):
+def test_downcast_uint64():
     # see gh-14422:
     # BUG: to_numeric doesn't work uint64 numbers
-
+    ser = Series([0, 9223372036854775808])
     result = to_numeric(ser, downcast="unsigned")
-
+    expected = Series([0, 9223372036854775808], dtype=np.uint64)
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
@@ -98,13 +98,12 @@ def test_to_timedelta_oob_non_nano(self):
         with pytest.raises(OutOfBoundsTimedelta, match=msg):
             TimedeltaArray._from_sequence(arr, dtype="m8[s]")
 
-    @pytest.mark.parametrize(
-        "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]
-    )
+    @pytest.mark.parametrize("box", [lambda x: x, pd.DataFrame])
     @pytest.mark.parametrize("errors", ["ignore", "raise", "coerce"])
     @pytest.mark.filterwarnings("ignore:errors='ignore' is deprecated:FutureWarning")
-    def test_to_timedelta_dataframe(self, arg, errors):
+    def test_to_timedelta_dataframe(self, box, errors):
         # GH 11776
+        arg = box(np.arange(10).reshape(2, 5))
         with pytest.raises(TypeError, match="1-d array"):
             to_timedelta(arg, errors=errors)
 
diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py
@@ -300,21 +300,14 @@ class SubDatetime(datetime):
     pass
 
 
-@pytest.mark.parametrize(
-    "data,expected",
-    [
-        ([SubDatetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
-        ([datetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
-        ([Timestamp(2000, 1, 1)], ["2000-01-01T00:00:00.000000000"]),
-    ],
-)
-def test_datetime_subclass(data, expected):
+@pytest.mark.parametrize("klass", [SubDatetime, datetime, Timestamp])
+def test_datetime_subclass(klass):
     # GH 25851
     # ensure that subclassed datetime works with
     # array_to_datetime
 
-    arr = np.array(data, dtype=object)
+    arr = np.array([klass(2000, 1, 1)], dtype=object)
     result, _ = tslib.array_to_datetime(arr)
 
-    expected = np.array(expected, dtype="M8[ns]")
+    expected = np.array(["2000-01-01T00:00:00.000000000"], dtype="M8[ns]")
     tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py
@@ -86,11 +86,12 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq):
 @pytest.mark.parametrize(
     "arr",
     [
-        pytest.param(np.array([], dtype=np.int64), id="empty"),
-        pytest.param(np.array([iNaT], dtype=np.int64), id="all_nat"),
+        pytest.param([], id="empty"),
+        pytest.param([iNaT], id="all_nat"),
     ],
 )
 def test_tz_convert_corner(arr):
+    arr = np.array([iNaT], dtype=np.int64)
     result = tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo"))
     tm.assert_numpy_array_equal(result, arr)
 
diff --git a/pandas/tests/tslibs/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py
@@ -127,10 +127,8 @@ def test_get_day_of_month_error():
         roll_qtrday(dt, n=3, month=11, day_opt=day_opt, modby=12)
 
 
-@pytest.mark.parametrize(
-    "month",
-    [3, 5],  # (other.month % 3) < (month % 3)  # (other.month % 3) > (month % 3)
-)
+@pytest.mark.parametrize("month", [3, 5])
+# (other.month % 3) < (month % 3)  # (other.month % 3) > (month % 3)
 @pytest.mark.parametrize("n", [4, -3])
 def test_roll_qtr_day_not_mod_unequal(day_opt, month, n):
     expected = {3: {-3: -2, 4: 4}, 5: {-3: -3, 4: 3}}
diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py
@@ -257,18 +257,14 @@ def test_assert_almost_equal_strings():
     _assert_almost_equal_both("abc", "abc")
 
 
-@pytest.mark.parametrize(
-    "a,b", [("abc", "abcd"), ("abc", "abd"), ("abc", 1), ("abc", [1])]
-)
-def test_assert_not_almost_equal_strings(a, b):
-    _assert_not_almost_equal_both(a, b)
+@pytest.mark.parametrize("b", ["abcd", "abd", 1, [1]])
+def test_assert_not_almost_equal_strings(b):
+    _assert_not_almost_equal_both("abc", b)
 
 
-@pytest.mark.parametrize(
-    "a,b", [([1, 2, 3], [1, 2, 3]), (np.array([1, 2, 3]), np.array([1, 2, 3]))]
-)
-def test_assert_almost_equal_iterables(a, b):
-    _assert_almost_equal_both(a, b)
+@pytest.mark.parametrize("box", [list, np.array])
+def test_assert_almost_equal_iterables(box):
+    _assert_almost_equal_both(box([1, 2, 3]), box([1, 2, 3]))
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/util/test_assert_categorical_equal.py b/pandas/tests/util/test_assert_categorical_equal.py
@@ -4,11 +4,9 @@
 import pandas._testing as tm
 
 
-@pytest.mark.parametrize(
-    "c",
-    [Categorical([1, 2, 3, 4]), Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4, 5])],
-)
+@pytest.mark.parametrize("c", [None, [1, 2, 3, 4, 5]])
 def test_categorical_equal(c):
+    c = Categorical([1, 2, 3, 4], categories=c)
     tm.assert_categorical_equal(c, c)
 
 
diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py

Original file line number	Diff line number	Diff line change
`@@ -86,11 +86,12 @@ def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq):`
`86`	`86`	`@pytest.mark.parametrize(`
`87`	`87`	`"arr",`
`88`	`88`	`[`
`89`		`- pytest.param(np.array([], dtype=np.int64), id="empty"),`
`90`		`- pytest.param(np.array([iNaT], dtype=np.int64), id="all_nat"),`
	`89`	`+ pytest.param([], id="empty"),`
	`90`	`+ pytest.param([iNaT], id="all_nat"),`
`91`	`91`	`],`
`92`	`92`	`)`
`93`	`93`	`def test_tz_convert_corner(arr):`
	`94`	`+ arr = np.array([iNaT], dtype=np.int64)`
`94`	`95`	`result = tz_convert_from_utc(arr, timezones.maybe_get_tz("Asia/Tokyo"))`
`95`	`96`	`tm.assert_numpy_array_equal(result, arr)`
`96`	`97`