Skip to content

TST: Simplify more slow tests #53862

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,24 @@ def test_compare_invalid(self):
b.name = pd.Timestamp("2000-01-01")
tm.assert_series_equal(a / b, 1 / (b / a))

def test_numeric_cmp_string_numexpr_path(self, box_with_array):
def test_numeric_cmp_string_numexpr_path(self, box_with_array, monkeypatch):
# GH#36377, GH#35700
box = box_with_array
xbox = box if box is not Index else np.ndarray

obj = Series(np.random.randn(10**5))
obj = Series(np.random.randn(51))
obj = tm.box_expected(obj, box, transpose=False)
with monkeypatch.context() as m:
m.setattr(expr, "_MIN_ELEMENTS", 50)
result = obj == "a"

result = obj == "a"

expected = Series(np.zeros(10**5, dtype=bool))
expected = Series(np.zeros(51, dtype=bool))
expected = tm.box_expected(expected, xbox, transpose=False)
tm.assert_equal(result, expected)

result = obj != "a"
with monkeypatch.context() as m:
m.setattr(expr, "_MIN_ELEMENTS", 50)
result = obj != "a"
tm.assert_equal(result, ~expected)

msg = "Invalid comparison between dtype=float64 and str"
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ def test_intercept_builtin_sum():
@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key
def test_builtins_apply(keys, f):
# see gh-8155
df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"])
df["jolie"] = np.random.randn(1000)
rs = np.random.RandomState(42)
df = DataFrame(rs.randint(1, 7, (10, 2)), columns=["jim", "joe"])
df["jolie"] = rs.randn(10)

gb = df.groupby(keys)

Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/indexes/base_class/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,18 @@ def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
# Go through the libindex path for which using
# _bin_search vs ndarray.searchsorted makes a difference

monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
lev = list("ABCD")
dti = pd.date_range("2016-01-01", periods=10)
with monkeypatch.context():
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
lev = list("ABCD")
dti = pd.date_range("2016-01-01", periods=10)

mi = pd.MultiIndex.from_product([lev, range(5), dti])
oidx = mi.to_flat_index()
mi = pd.MultiIndex.from_product([lev, range(5), dti])
oidx = mi.to_flat_index()

loc = len(oidx) // 2
tup = oidx[loc]
loc = len(oidx) // 2
tup = oidx[loc]

res = oidx.get_loc(tup)
res = oidx.get_loc(tup)
assert res == loc

def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
Expand Down
63 changes: 34 additions & 29 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np
import pytest

from pandas._libs import hashtable
from pandas._libs import (
hashtable,
index as libindex,
)

from pandas import (
NA,
Expand Down Expand Up @@ -232,41 +235,43 @@ def test_duplicated(idx_dup, keep, expected):


@pytest.mark.arm_slow
def test_duplicated_large(keep):
def test_duplicated_hashtable_impl(keep, monkeypatch):
# GH 9125
n, k = 200, 5000
n, k = 6, 10
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
codes = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, codes=codes)
codes = [np.random.choice(n, k * n) for _ in levels]
with monkeypatch.context() as m:
m.setattr(libindex, "_SIZE_CUTOFF", 50)
mi = MultiIndex(levels=levels, codes=codes)

result = mi.duplicated(keep=keep)
expected = hashtable.duplicated(mi.values, keep=keep)
result = mi.duplicated(keep=keep)
expected = hashtable.duplicated(mi.values, keep=keep)
tm.assert_numpy_array_equal(result, expected)


def test_duplicated2():
# TODO: more informative test name
@pytest.mark.parametrize("val", [101, 102])
def test_duplicated_with_nan(val):
# GH5873
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
assert not mi.has_duplicates

tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))


@pytest.mark.parametrize("n", range(1, 6))
@pytest.mark.parametrize("m", range(1, 5))
def test_duplicated_with_nan_multi_shape(n, m):
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates

tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))

for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates

tm.assert_numpy_array_equal(
mi.duplicated(), np.zeros(len(mi), dtype="bool")
)
# all possible unique combinations, including nan
codes = product(range(-1, n), range(-1, m))
mi = MultiIndex(
levels=[list("abcde")[:n], list("WXYZ")[:m]],
codes=np.random.permutation(list(codes)).T,
)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates

tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))


def test_duplicated_drop_duplicates():
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/indexes/multi/test_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._libs import index as libindex

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike

import pandas as pd
Expand Down Expand Up @@ -186,12 +188,11 @@ def test_large_multiindex_error():
df_above_1000000.loc[(3, 0), "dest"]


def test_million_record_attribute_error():
def test_mi_hashtable_populated_attribute_error(monkeypatch):
# GH 18165
r = list(range(1000000))
df = pd.DataFrame(
{"a": r, "b": r}, index=MultiIndex.from_tuples([(x, x) for x in r])
)
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
r = range(50)
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))

msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def test_gzip_reproducibility_file_name():
with tm.ensure_clean() as path:
path = Path(path)
df.to_csv(path, compression=compression_options)
time.sleep(2)
time.sleep(0.1)
output = path.read_bytes()
df.to_csv(path, compression=compression_options)
assert output == path.read_bytes()
Expand All @@ -196,7 +196,7 @@ def test_gzip_reproducibility_file_object():
buffer = io.BytesIO()
df.to_csv(buffer, compression=compression_options, mode="wb")
output = buffer.getvalue()
time.sleep(2)
time.sleep(0.1)
buffer = io.BytesIO()
df.to_csv(buffer, compression=compression_options, mode="wb")
assert output == buffer.getvalue()
Expand Down
20 changes: 11 additions & 9 deletions pandas/tests/plotting/frame/test_frame_subplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def test_bar_log_subplots(self):
tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected)
tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected)

def test_boxplot_subplots_return_type(self, hist_df):
def test_boxplot_subplots_return_type_default(self, hist_df):
df = hist_df

# normal style: return_type=None
Expand All @@ -414,14 +414,16 @@ def test_boxplot_subplots_return_type(self, hist_df):
result, None, expected_keys=["height", "weight", "category"]
)

for t in ["dict", "axes", "both"]:
returned = df.plot.box(return_type=t, subplots=True)
_check_box_return_type(
returned,
t,
expected_keys=["height", "weight", "category"],
check_ax_title=False,
)
@pytest.mark.parametrize("rt", ["dict", "axes", "both"])
def test_boxplot_subplots_return_type(self, hist_df, rt):
df = hist_df
returned = df.plot.box(return_type=rt, subplots=True)
_check_box_return_type(
returned,
rt,
expected_keys=["height", "weight", "category"],
check_ax_title=False,
)

def test_df_subplots_patterns_minorticks(self):
# GH 10657
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/tslibs/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
@pytest.mark.parametrize("freq", ["D", "A"])
def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq):
tz = tz_aware_fixture
tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz)
naive_didx = date_range("2000-01-01", "2020-01-01", freq=freq)
tz_didx = date_range("2018-01-01", "2020-01-01", freq=freq, tz=tz)
naive_didx = date_range("2018-01-01", "2020-01-01", freq=freq)

_compare_utc_to_local(tz_didx)
_compare_local_to_utc(tz_didx, naive_didx)
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/window/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,22 @@ def numeric_only(request):
return request.param


@pytest.fixture(params=[pytest.param("numba", marks=td.skip_if_no("numba")), "cython"])
@pytest.fixture(
params=[
pytest.param("numba", marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]),
"cython",
]
)
def engine(request):
"""engine keyword argument for rolling.apply"""
return request.param


@pytest.fixture(
params=[
pytest.param(("numba", True), marks=td.skip_if_no("numba")),
pytest.param(
("numba", True), marks=[td.skip_if_no("numba"), pytest.mark.single_cpu]
),
("cython", True),
("cython", False),
]
Expand Down