TST: Simplify more slow tests (#53862)

mroeschke · web-flow · commit 49dc7b9e35a3 · 2023-06-26T16:15:16.000-07:00
* Clean more slow tests

* Closer scope
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -104,21 +104,24 @@ def test_compare_invalid(self):
         b.name = pd.Timestamp("2000-01-01")
         tm.assert_series_equal(a / b, 1 / (b / a))
 
-    def test_numeric_cmp_string_numexpr_path(self, box_with_array):
+    def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch):
         # GH#36377, GH#35700
         box = box_with_array
         xbox = box if box is not Index else np.ndarray
 
-        obj = Series(np.random.randn(10**5))
+        obj = Series(np.random.randn(51))
         obj = tm.box_expected(obj, box, transpose=False)
+        with monkeypatch.context() as m:
+            m.setattr(expr, "_MIN_ELEMENTS", 50)
+            result = obj == "a"
 
-        result = obj == "a"
-
-        expected = Series(np.zeros(10**5, dtype=bool))
+        expected = Series(np.zeros(51, dtype=bool))
         expected = tm.box_expected(expected, xbox, transpose=False)
         tm.assert_equal(result, expected)
 
-        result = obj != "a"
+        with monkeypatch.context() as m:
+            m.setattr(expr, "_MIN_ELEMENTS", 50)
+            result = obj != "a"
         tm.assert_equal(result, ~expected)
 
         msg = "Invalid comparison between dtype=float64 and str"
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -68,8 +68,9 @@ def test_intercept_builtin_sum():
 @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]])  # Single key  # Multi-key
 def test_builtins_apply(keys, f):
     # see gh-8155
-    df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"])
-    df["jolie"] = np.random.randn(1000)
+    rs = np.random.RandomState(42)
+    df = DataFrame(rs.randint(1, 7, (10, 2)), columns=["jim", "joe"])
+    df["jolie"] = rs.randn(10)
 
     gb = df.groupby(keys)
 
diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py
@@ -64,17 +64,18 @@ def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
         # Go through the libindex path for which using
         # _bin_search vs ndarray.searchsorted makes a difference
 
-        monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
-        lev = list("ABCD")
-        dti = pd.date_range("2016-01-01", periods=10)
+        with monkeypatch.context():
+            monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
+            lev = list("ABCD")
+            dti = pd.date_range("2016-01-01", periods=10)
 
-        mi = pd.MultiIndex.from_product([lev, range(5), dti])
-        oidx = mi.to_flat_index()
+            mi = pd.MultiIndex.from_product([lev, range(5), dti])
+            oidx = mi.to_flat_index()
 
-        loc = len(oidx) // 2
-        tup = oidx[loc]
+            loc = len(oidx) // 2
+            tup = oidx[loc]
 
-        res = oidx.get_loc(tup)
+            res = oidx.get_loc(tup)
         assert res == loc
 
     def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py
@@ -3,7 +3,10 @@
 import numpy as np
 import pytest
 
-from pandas._libs import hashtable
+from pandas._libs import (
+    hashtable,
+    index as libindex,
+)
 
 from pandas import (
     NA,
@@ -232,41 +235,43 @@ def test_duplicated(idx_dup, keep, expected):
 
 
 @pytest.mark.arm_slow
-def test_duplicated_large(keep):
+def test_duplicated_hashtable_impl(keep, monkeypatch):
     # GH 9125
-    n, k = 200, 5000
+    n, k = 6, 10
     levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
-    codes = [np.random.choice(n, k * n) for lev in levels]
-    mi = MultiIndex(levels=levels, codes=codes)
+    codes = [np.random.choice(n, k * n) for _ in levels]
+    with monkeypatch.context() as m:
+        m.setattr(libindex, "_SIZE_CUTOFF", 50)
+        mi = MultiIndex(levels=levels, codes=codes)
 
-    result = mi.duplicated(keep=keep)
-    expected = hashtable.duplicated(mi.values, keep=keep)
+        result = mi.duplicated(keep=keep)
+        expected = hashtable.duplicated(mi.values, keep=keep)
     tm.assert_numpy_array_equal(result, expected)
 
 
-def test_duplicated2():
-    # TODO: more informative test name
+@pytest.mark.parametrize("val", [101, 102])
+def test_duplicated_with_nan(val):
+    # GH5873
+    mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
+    assert not mi.has_duplicates
+
+    tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
+
+
+@pytest.mark.parametrize("n", range(1, 6))
+@pytest.mark.parametrize("m", range(1, 5))
+def test_duplicated_with_nan_multi_shape(n, m):
     # GH5873
-    for a in [101, 102]:
-        mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
-        assert not mi.has_duplicates
-
-        tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
-
-    for n in range(1, 6):  # 1st level shape
-        for m in range(1, 5):  # 2nd level shape
-            # all possible unique combinations, including nan
-            codes = product(range(-1, n), range(-1, m))
-            mi = MultiIndex(
-                levels=[list("abcde")[:n], list("WXYZ")[:m]],
-                codes=np.random.permutation(list(codes)).T,
-            )
-            assert len(mi) == (n + 1) * (m + 1)
-            assert not mi.has_duplicates
-
-            tm.assert_numpy_array_equal(
-                mi.duplicated(), np.zeros(len(mi), dtype="bool")
-            )
+    # all possible unique combinations, including nan
+    codes = product(range(-1, n), range(-1, m))
+    mi = MultiIndex(
+        levels=[list("abcde")[:n], list("WXYZ")[:m]],
+        codes=np.random.permutation(list(codes)).T,
+    )
+    assert len(mi) == (n + 1) * (m + 1)
+    assert not mi.has_duplicates
+
+    tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
 
 
 def test_duplicated_drop_duplicates():
diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._libs import index as libindex
+
 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 
 import pandas as pd
@@ -186,12 +188,11 @@ def test_large_multiindex_error():
         df_above_1000000.loc[(3, 0), "dest"]
 
 
-def test_million_record_attribute_error():
+def test_mi_hashtable_populated_attribute_error(monkeypatch):
     # GH 18165
-    r = list(range(1000000))
-    df = pd.DataFrame(
-        {"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r])
-    )
+    monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
+    r = range(50)
+    df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
 
     msg = "'Series' object has no attribute 'foo'"
     with pytest.raises(AttributeError, match=msg):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
@@ -177,7 +177,7 @@ def test_gzip_reproducibility_file_name():
     with tm.ensure_clean() as path:
         path = Path(path)
         df.to_csv(path, compression=compression_options)
-        time.sleep(2)
+        time.sleep(0.1)
         output = path.read_bytes()
         df.to_csv(path, compression=compression_options)
         assert output == path.read_bytes()
@@ -196,7 +196,7 @@ def test_gzip_reproducibility_file_object():
     buffer = io.BytesIO()
     df.to_csv(buffer, compression=compression_options, mode="wb")
     output = buffer.getvalue()
-    time.sleep(2)
+    time.sleep(0.1)
     buffer = io.BytesIO()
     df.to_csv(buffer, compression=compression_options, mode="wb")
     assert output == buffer.getvalue()
diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py
@@ -404,7 +404,7 @@ def test_bar_log_subplots(self):
         tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
         tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)
 
-    def test_boxplot_subplots_return_type(self, hist_df):
+    def test_boxplot_subplots_return_type_default(self, hist_df):
         df = hist_df
 
         # normal style: return_type=None
@@ -414,14 +414,16 @@ def test_boxplot_subplots_return_type(self, hist_df):
             result, None, expected_keys=["height", "weight", "category"]
         )
 
-        for t in ["dict", "axes", "both"]:
-            returned = df.plot.box(return_type=t, subplots=True)
-            _check_box_return_type(
-                returned,
-                t,
-                expected_keys=["height", "weight", "category"],
-                check_ax_title=False,
-            )
+    @pytest.mark.parametrize("rt", ["dict", "axes", "both"])
+    def test_boxplot_subplots_return_type(self, hist_df, rt):
+        df = hist_df
+        returned = df.plot.box(return_type=rt, subplots=True)
+        _check_box_return_type(
+            returned,
+            rt,
+            expected_keys=["height", "weight", "category"],
+            check_ax_title=False,
+        )
 
     def test_df_subplots_patterns_minorticks(self):
         # GH 10657
diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py
@@ -76,8 +76,8 @@ def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
 @pytest.mark.parametrize("freq", ["D", "A"])
 def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq):
     tz = tz_aware_fixture
-    tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz)
-    naive_didx = date_range("2000-01-01", "2020-01-01", freq=freq)
+    tz_didx = date_range("2018-01-01", "2020-01-01", freq=freq, tz=tz)
+    naive_didx = date_range("2018-01-01", "2020-01-01", freq=freq)
 
     _compare_utc_to_local(tz_didx)
     _compare_local_to_utc(tz_didx, naive_didx)
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
@@ -90,15 +90,22 @@ def numeric_only(request):
     return request.param
 
 
-@pytest.fixture(params=[pytest.param("numba", marks=td.skip_if_no("numba")), "cython"])
+@pytest.fixture(
+    params=[
+        pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]),
+        "cython",
+    ]
+)
 def engine(request):
     """engine keyword argument for rolling.apply"""
     return request.param
 
 
 @pytest.fixture(
     params=[
-        pytest.param(("numba", True), marks=td.skip_if_no("numba")),
+        pytest.param(
+            ("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]
+        ),
         ("cython", True),
         ("cython", False),
     ]