From 013757c1c8a2a68bb3b489841796d0bd8bf8201d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 24 Jun 2023 20:47:56 -0700 Subject: [PATCH 1/2] Clean more slow tests --- pandas/tests/arithmetic/test_numeric.py | 7 ++- pandas/tests/groupby/test_function.py | 5 +- pandas/tests/indexes/multi/test_duplicates.py | 56 ++++++++++--------- pandas/tests/indexes/multi/test_integrity.py | 11 ++-- pandas/tests/io/test_compression.py | 4 +- .../plotting/frame/test_frame_subplots.py | 20 ++++--- pandas/tests/tslibs/test_conversion.py | 4 +- pandas/tests/window/conftest.py | 11 +++- 8 files changed, 67 insertions(+), 51 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index a03c69d8e849c..ffb663d04b0f0 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -104,17 +104,18 @@ def test_compare_invalid(self): b.name = pd.Timestamp("2000-01-01") tm.assert_series_equal(a / b, 1 / (b / a)) - def test_numeric_cmp_string_numexpr_path(self, box_with_array): + def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch): # GH#36377, GH#35700 + monkeypatch.setattr(expr, "_MIN_ELEMENTS", 50) box = box_with_array xbox = box if box is not Index else np.ndarray - obj = Series(np.random.randn(10**5)) + obj = Series(np.random.randn(51)) obj = tm.box_expected(obj, box, transpose=False) result = obj == "a" - expected = Series(np.zeros(10**5, dtype=bool)) + expected = Series(np.zeros(51, dtype=bool)) expected = tm.box_expected(expected, xbox, transpose=False) tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 0535bafc2a907..b97afe8ae9524 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -68,8 +68,9 @@ def test_intercept_builtin_sum(): @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key def test_builtins_apply(keys, f): # see gh-8155 - df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) - df["jolie"] = np.random.randn(1000) + rs = np.random.RandomState(42) + df = DataFrame(rs.randint(1, 7, (10, 2)), columns=["jim", "joe"]) + df["jolie"] = rs.randn(10) gb = df.groupby(keys) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 6c31caac4b42d..03e05426fc298 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas._libs import hashtable +from pandas._libs import ( + hashtable, + index as libindex, +) from pandas import ( NA, @@ -232,11 +235,12 @@ def test_duplicated(idx_dup, keep, expected): @pytest.mark.arm_slow -def test_duplicated_large(keep): +def test_duplicated_hashtable_impl(keep, monkeypatch): # GH 9125 - n, k = 200, 5000 + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) + n, k = 6, 10 levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - codes = [np.random.choice(n, k * n) for lev in levels] + codes = [np.random.choice(n, k * n) for _ in levels] mi = MultiIndex(levels=levels, codes=codes) result = mi.duplicated(keep=keep) @@ -244,29 +248,29 @@ def test_duplicated_large(keep): tm.assert_numpy_array_equal(result, expected) -def test_duplicated2(): - # TODO: more informative test name +@pytest.mark.parametrize("val", [101, 102]) +def test_duplicated_with_nan(val): + # GH5873 + mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]]) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) + + +@pytest.mark.parametrize("n", range(1, 6)) +@pytest.mark.parametrize("m", range(1, 5)) +def test_duplicated_with_nan_multi_shape(n, m): # GH5873 - for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - assert not mi.has_duplicates - - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) - - for n in range(1, 6): # 1st level shape - for m in range(1, 5): # 2nd level shape - # all possible unique combinations, including nan - codes = product(range(-1, n), range(-1, m)) - mi = MultiIndex( - levels=[list("abcde")[:n], list("WXYZ")[:m]], - codes=np.random.permutation(list(codes)).T, - ) - assert len(mi) == (n + 1) * (m + 1) - assert not mi.has_duplicates - - tm.assert_numpy_array_equal( - mi.duplicated(), np.zeros(len(mi), dtype="bool") - ) + # all possible unique combinations, including nan + codes = product(range(-1, n), range(-1, m)) + mi = MultiIndex( + levels=[list("abcde")[:n], list("WXYZ")[:m]], + codes=np.random.permutation(list(codes)).T, + ) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool")) def test_duplicated_drop_duplicates(): diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index a650f7f81a19f..72b6754542fa6 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs import index as libindex + from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd @@ -186,12 +188,11 @@ def test_large_multiindex_error(): df_above_1000000.loc[(3, 0), "dest"] -def test_million_record_attribute_error(): +def test_mi_hashtable_populated_attribute_error(monkeypatch): # GH 18165 - r = list(range(1000000)) - df = pd.DataFrame( - {"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r]) - ) + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) + r = range(50) + df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r])) msg = "'Series' object has no attribute 'foo'" with pytest.raises(AttributeError, match=msg): diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index c84670f0eb69c..9bdfbad347481 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -177,7 +177,7 @@ def test_gzip_reproducibility_file_name(): with tm.ensure_clean() as path: path = Path(path) df.to_csv(path, compression=compression_options) - time.sleep(2) + time.sleep(0.1) output = path.read_bytes() df.to_csv(path, compression=compression_options) assert output == path.read_bytes() @@ -196,7 +196,7 @@ def test_gzip_reproducibility_file_object(): buffer = io.BytesIO() df.to_csv(buffer, compression=compression_options, mode="wb") output = buffer.getvalue() - time.sleep(2) + time.sleep(0.1) buffer = io.BytesIO() df.to_csv(buffer, compression=compression_options, mode="wb") assert output == buffer.getvalue() diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 336fed6293070..8a0632cab8e2e 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -408,7 +408,7 @@ def test_bar_log_subplots(self): tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - def test_boxplot_subplots_return_type(self, hist_df): + def test_boxplot_subplots_return_type_default(self, hist_df): df = hist_df # normal style: return_type=None @@ -418,14 +418,16 @@ def test_boxplot_subplots_return_type(self, hist_df): result, None, expected_keys=["height", "weight", "category"] ) - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - _check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) + @pytest.mark.parametrize("rt", ["dict", "axes", "both"]) + def test_boxplot_subplots_return_type(self, hist_df, rt): + df = hist_df + returned = df.plot.box(return_type=rt, subplots=True) + _check_box_return_type( + returned, + rt, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) def test_df_subplots_patterns_minorticks(self): # GH 10657 diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 42d2bc01f5e67..c1ab0ba0b5e6f 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -76,8 +76,8 @@ def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): @pytest.mark.parametrize("freq", ["D", "A"]) def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): tz = tz_aware_fixture - tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz) - naive_didx = date_range("2000-01-01", "2020-01-01", freq=freq) + tz_didx = date_range("2018-01-01", "2020-01-01", freq=freq, tz=tz) + naive_didx = date_range("2018-01-01", "2020-01-01", freq=freq) _compare_utc_to_local(tz_didx) _compare_local_to_utc(tz_didx, naive_didx) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 23fdb56f3a2b2..479829ccc7169 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -90,7 +90,12 @@ def numeric_only(request): return request.param -@pytest.fixture(params=[pytest.param("numba", marks=td.skip_if_no("numba")), "cython"]) +@pytest.fixture( + params=[ + pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]), + "cython", + ] +) def engine(request): """engine keyword argument for rolling.apply""" return request.param @@ -98,7 +103,9 @@ def engine(request): @pytest.fixture( params=[ - pytest.param(("numba", True), marks=td.skip_if_no("numba")), + pytest.param( + ("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu] + ), ("cython", True), ("cython", False), ] From 27849c7a795511afda60e07e0cc4ea2c13b68665 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 26 Jun 2023 13:24:58 -0700 Subject: [PATCH 2/2] Closer scope --- pandas/tests/arithmetic/test_numeric.py | 10 ++++++---- .../tests/indexes/base_class/test_indexing.py | 17 +++++++++-------- pandas/tests/indexes/multi/test_duplicates.py | 9 +++++---- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index ffb663d04b0f0..455cae084b7c6 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -106,20 +106,22 @@ def test_compare_invalid(self): def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch): # GH#36377, GH#35700 - monkeypatch.setattr(expr, "_MIN_ELEMENTS", 50) box = box_with_array xbox = box if box is not Index else np.ndarray obj = Series(np.random.randn(51)) obj = tm.box_expected(obj, box, transpose=False) - - result = obj == "a" + with monkeypatch.context() as m: + m.setattr(expr, "_MIN_ELEMENTS", 50) + result = obj == "a" expected = Series(np.zeros(51, dtype=bool)) expected = tm.box_expected(expected, xbox, transpose=False) tm.assert_equal(result, expected) - result = obj != "a" + with monkeypatch.context() as m: + m.setattr(expr, "_MIN_ELEMENTS", 50) + result = obj != "a" tm.assert_equal(result, ~expected) msg = "Invalid comparison between dtype=float64 and str" diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 4e5a50fab1a48..2988fa7d1baa1 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -64,17 +64,18 @@ def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch): # Go through the libindex path for which using # _bin_search vs ndarray.searchsorted makes a difference - monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100) - lev = list("ABCD") - dti = pd.date_range("2016-01-01", periods=10) + with monkeypatch.context(): + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100) + lev = list("ABCD") + dti = pd.date_range("2016-01-01", periods=10) - mi = pd.MultiIndex.from_product([lev, range(5), dti]) - oidx = mi.to_flat_index() + mi = pd.MultiIndex.from_product([lev, range(5), dti]) + oidx = mi.to_flat_index() - loc = len(oidx) // 2 - tup = oidx[loc] + loc = len(oidx) // 2 + tup = oidx[loc] - res = oidx.get_loc(tup) + res = oidx.get_loc(tup) assert res == loc def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 03e05426fc298..93f3e51d878c0 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -237,14 +237,15 @@ def test_duplicated(idx_dup, keep, expected): @pytest.mark.arm_slow def test_duplicated_hashtable_impl(keep, monkeypatch): # GH 9125 - monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50) n, k = 6, 10 levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] codes = [np.random.choice(n, k * n) for _ in levels] - mi = MultiIndex(levels=levels, codes=codes) + with monkeypatch.context() as m: + m.setattr(libindex, "_SIZE_CUTOFF", 50) + mi = MultiIndex(levels=levels, codes=codes) - result = mi.duplicated(keep=keep) - expected = hashtable.duplicated(mi.values, keep=keep) + result = mi.duplicated(keep=keep) + expected = hashtable.duplicated(mi.values, keep=keep) tm.assert_numpy_array_equal(result, expected)