TST: Refactor more slow tests (#53800)

mroeschke · web-flow · commit a7a5b137f1d3 · 2023-06-23T10:14:05.000-07:00
* TST: Refactor more slow test

* more tests

* define less fixtures
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -697,6 +697,36 @@ def test_groupby_cum_skipna(op, skipna, input, exp):
     tm.assert_series_equal(expected, result)
 
 
+@pytest.fixture
+def frame():
+    floating = Series(np.random.randn(10))
+    floating_missing = floating.copy()
+    floating_missing.iloc[2:7] = np.nan
+    strings = list("abcde") * 2
+    strings_missing = strings[:]
+    strings_missing[5] = np.nan
+
+    df = DataFrame(
+        {
+            "float": floating,
+            "float_missing": floating_missing,
+            "int": [1, 1, 1, 1, 2] * 2,
+            "datetime": date_range("1990-1-1", periods=10),
+            "timedelta": pd.timedelta_range(1, freq="s", periods=10),
+            "string": strings,
+            "string_missing": strings_missing,
+            "cat": Categorical(strings),
+        },
+    )
+    return df
+
+
+@pytest.fixture
+def frame_mi(frame):
+    frame.index = MultiIndex.from_product([range(5), range(2)])
+    return frame
+
+
 @pytest.mark.slow
 @pytest.mark.parametrize(
     "op, args, targop",
@@ -707,100 +737,110 @@ def test_groupby_cum_skipna(op, skipna, input, exp):
         ("shift", (1,), lambda x: x.shift()),
     ],
 )
-def test_cython_transform_frame(op, args, targop):
-    s = Series(np.random.randn(1000))
-    s_missing = s.copy()
-    s_missing.iloc[2:10] = np.nan
-    labels = np.random.randint(0, 50, size=1000).astype(float)
-    strings = list("qwertyuiopasdfghjklz")
-    strings_missing = strings[:]
-    strings_missing[5] = np.nan
-    df = DataFrame(
-        {
-            "float": s,
-            "float_missing": s_missing,
-            "int": [1, 1, 1, 1, 2] * 200,
-            "datetime": date_range("1990-1-1", periods=1000),
-            "timedelta": pd.timedelta_range(1, freq="s", periods=1000),
-            "string": strings * 50,
-            "string_missing": strings_missing * 50,
-        },
-        columns=[
-            "float",
-            "float_missing",
-            "int",
-            "datetime",
-            "timedelta",
-            "string",
-            "string_missing",
-        ],
-    )
-    df["cat"] = df["string"].astype("category")
-
-    df2 = df.copy()
-    df2.index = MultiIndex.from_product([range(100), range(10)])
-
-    # DataFrame - Single and MultiIndex,
-    # group by values, index level, columns
-    for df in [df, df2]:
-        for gb_target in [
-            {"by": labels},
-            {"level": 0},
-            {"by": "string"},
-        ]:  # {"by": 'string_missing'}]:
-            # {"by": ['int','string']}]:
-            # TODO: remove or enable commented-out code
-
-            gb = df.groupby(group_keys=False, **gb_target)
-
-            if op != "shift" and "int" not in gb_target:
-                # numeric apply fastpath promotes dtype so have
-                # to apply separately and concat
-                i = gb[["int"]].apply(targop)
-                f = gb[["float", "float_missing"]].apply(targop)
-                expected = concat([f, i], axis=1)
-            else:
-                expected = gb.apply(targop)
-
-            expected = expected.sort_index(axis=1)
-            if op == "shift":
-                expected["string_missing"] = expected["string_missing"].fillna(
-                    np.nan, downcast=False
-                )
-                expected["string"] = expected["string"].fillna(np.nan, downcast=False)
-
-            result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
-            tm.assert_frame_equal(result, expected)
-            result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1)
-            tm.assert_frame_equal(result, expected)
-            # individual columns
-            for c in df:
-                if (
-                    c not in ["float", "int", "float_missing"]
-                    and op != "shift"
-                    and not (c == "timedelta" and op == "cumsum")
-                ):
-                    msg = "|".join(
-                        [
-                            "does not support .* operations",
-                            ".* is not supported for object dtype",
-                            "is not implemented for this dtype",
-                        ]
-                    )
-                    with pytest.raises(TypeError, match=msg):
-                        gb[c].transform(op)
-                    with pytest.raises(TypeError, match=msg):
-                        getattr(gb[c], op)()
-                else:
-                    expected = gb[c].apply(targop)
-                    expected.name = c
-                    if c in ["string_missing", "string"]:
-                        expected = expected.fillna(np.nan, downcast=False)
-
-                    res = gb[c].transform(op, *args)
-                    tm.assert_series_equal(expected, res)
-                    res2 = getattr(gb[c], op)(*args)
-                    tm.assert_series_equal(expected, res2)
+@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"])
+@pytest.mark.parametrize(
+    "gb_target",
+    [
+        {"by": np.random.randint(0, 50, size=10).astype(float)},
+        {"level": 0},
+        {"by": "string"},
+        # {"by": 'string_missing'}]:
+        # {"by": ['int','string']}]:
+        # TODO: remove or enable commented-out code
+    ],
+)
+def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
+    df = request.getfixturevalue(df_fix)
+    gb = df.groupby(group_keys=False, **gb_target)
+
+    if op != "shift" and "int" not in gb_target:
+        # numeric apply fastpath promotes dtype so have
+        # to apply separately and concat
+        i = gb[["int"]].apply(targop)
+        f = gb[["float", "float_missing"]].apply(targop)
+        expected = concat([f, i], axis=1)
+    else:
+        expected = gb.apply(targop)
+
+    expected = expected.sort_index(axis=1)
+    if op == "shift":
+        expected["string_missing"] = expected["string_missing"].fillna(
+            np.nan, downcast=False
+        )
+        expected["string"] = expected["string"].fillna(np.nan, downcast=False)
+
+    result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
+    tm.assert_frame_equal(result, expected)
+    result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "op, args, targop",
+    [
+        ("cumprod", (), lambda x: x.cumprod()),
+        ("cumsum", (), lambda x: x.cumsum()),
+        ("shift", (-1,), lambda x: x.shift(-1)),
+        ("shift", (1,), lambda x: x.shift()),
+    ],
+)
+@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"])
+@pytest.mark.parametrize(
+    "gb_target",
+    [
+        {"by": np.random.randint(0, 50, size=10).astype(float)},
+        {"level": 0},
+        {"by": "string"},
+        # {"by": 'string_missing'}]:
+        # {"by": ['int','string']}]:
+        # TODO: remove or enable commented-out code
+    ],
+)
+@pytest.mark.parametrize(
+    "column",
+    [
+        "float",
+        "float_missing",
+        "int",
+        "datetime",
+        "timedelta",
+        "string",
+        "string_missing",
+    ],
+)
+def test_cython_transform_frame_column(
+    request, op, args, targop, df_fix, gb_target, column
+):
+    df = request.getfixturevalue(df_fix)
+    gb = df.groupby(group_keys=False, **gb_target)
+    c = column
+    if (
+        c not in ["float", "int", "float_missing"]
+        and op != "shift"
+        and not (c == "timedelta" and op == "cumsum")
+    ):
+        msg = "|".join(
+            [
+                "does not support .* operations",
+                ".* is not supported for object dtype",
+                "is not implemented for this dtype",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            gb[c].transform(op)
+        with pytest.raises(TypeError, match=msg):
+            getattr(gb[c], op)()
+    else:
+        expected = gb[c].apply(targop)
+        expected.name = c
+        if c in ["string_missing", "string"]:
+            expected = expected.fillna(np.nan, downcast=False)
+
+        res = gb[c].transform(op, *args)
+        tm.assert_series_equal(expected, res)
+        res2 = getattr(gb[c], op)(*args)
+        tm.assert_series_equal(expected, res2)
 
 
 def test_transform_with_non_scalar_group():
diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas._libs import index as libindex
+
 import pandas as pd
 from pandas import (
     Index,
@@ -40,14 +42,15 @@ def test_get_indexer_non_unique_dtype_mismatch(self):
 
 class TestGetLoc:
     @pytest.mark.slow  # to_flat_index takes a while
-    def test_get_loc_tuple_monotonic_above_size_cutoff(self):
+    def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
         # Go through the libindex path for which using
         # _bin_search vs ndarray.searchsorted makes a difference
 
-        lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
-        dti = pd.date_range("2016-01-01", periods=100)
+        monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
+        lev = list("ABCD")
+        dti = pd.date_range("2016-01-01", periods=10)
 
-        mi = pd.MultiIndex.from_product([lev, range(10**3), dti])
+        mi = pd.MultiIndex.from_product([lev, range(5), dti])
         oidx = mi.to_flat_index()
 
         loc = len(oidx) // 2
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -11,12 +11,12 @@
 
 @pytest.fixture
 def m():
-    return 50
+    return 5
 
 
 @pytest.fixture
 def n():
-    return 1000
+    return 100
 
 
 @pytest.fixture
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
@@ -51,7 +51,20 @@ def test_stacked_boxplot_set_axis(self):
         )
 
     @pytest.mark.slow
-    def test_boxplot_legacy1(self):
+    @pytest.mark.parametrize(
+        "kwargs, warn",
+        [
+            [{"return_type": "dict"}, None],
+            [{"column": ["one", "two"]}, None],
+            [{"column": ["one", "two"], "by": "indic"}, UserWarning],
+            [{"column": ["one"], "by": ["indic", "indic2"]}, None],
+            [{"by": "indic"}, UserWarning],
+            [{"by": ["indic", "indic2"]}, UserWarning],
+            [{"notch": 1}, None],
+            [{"by": "indic", "notch": 1}, UserWarning],
+        ],
+    )
+    def test_boxplot_legacy1(self, kwargs, warn):
         df = DataFrame(
             np.random.randn(6, 4),
             index=list(string.ascii_letters[:6]),
@@ -60,20 +73,13 @@ def test_boxplot_legacy1(self):
         df["indic"] = ["foo", "bar"] * 3
         df["indic2"] = ["foo", "bar", "foo"] * 2
 
-        _check_plot_works(df.boxplot, return_type="dict")
-        _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict")
-        # _check_plot_works adds an ax so catch warning. see GH #13188
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            _check_plot_works(df.boxplot, column=["one", "two"], by="indic")
-        _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"])
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            _check_plot_works(df.boxplot, by="indic")
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            _check_plot_works(df.boxplot, by=["indic", "indic2"])
-        _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict")
-        _check_plot_works(df.boxplot, notch=1, return_type="dict")
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            _check_plot_works(df.boxplot, by="indic", notch=1)
+        # _check_plot_works can add an ax so catch warning. see GH #13188
+        with tm.assert_produces_warning(warn, check_stacklevel=False):
+            _check_plot_works(df.boxplot, **kwargs)
+
+    def test_boxplot_legacy1_series(self):
+        ser = Series(np.random.randn(6))
+        _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict")
 
     def test_boxplot_legacy2(self):
         df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"])
@@ -347,17 +353,21 @@ def test_boxplot_legacy2(self):
         axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
         _check_axes_shape(axes, axes_num=1, layout=(1, 1))
 
-    def test_boxplot_legacy3(self):
+    @pytest.mark.parametrize(
+        "subplots, warn, axes_num, layout",
+        [[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]],
+    )
+    def test_boxplot_legacy3(self, subplots, warn, axes_num, layout):
         tuples = zip(string.ascii_letters[:10], range(10))
         df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
         msg = "DataFrame.groupby with axis=1 is deprecated"
         with tm.assert_produces_warning(FutureWarning, match=msg):
             grouped = df.unstack(level=1).groupby(level=0, axis=1)
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            axes = _check_plot_works(grouped.boxplot, return_type="axes")
-        _check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
-        axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
-        _check_axes_shape(axes, axes_num=1, layout=(1, 1))
+        with tm.assert_produces_warning(warn, check_stacklevel=False):
+            axes = _check_plot_works(
+                grouped.boxplot, subplots=subplots, return_type="axes"
+            )
+        _check_axes_shape(axes, axes_num=axes_num, layout=layout)
 
     def test_grouped_plot_fignums(self):
         n = 10
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py