Skip to content

TST: Refactor more slow tests #53800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 134 additions & 94 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,36 @@ def test_groupby_cum_skipna(op, skipna, input, exp):
tm.assert_series_equal(expected, result)


@pytest.fixture
def frame():
floating = Series(np.random.randn(10))
floating_missing = floating.copy()
floating_missing.iloc[2:7] = np.nan
strings = list("abcde") * 2
strings_missing = strings[:]
strings_missing[5] = np.nan

df = DataFrame(
{
"float": floating,
"float_missing": floating_missing,
"int": [1, 1, 1, 1, 2] * 2,
"datetime": date_range("1990-1-1", periods=10),
"timedelta": pd.timedelta_range(1, freq="s", periods=10),
"string": strings,
"string_missing": strings_missing,
"cat": Categorical(strings),
},
)
return df


@pytest.fixture
def frame_mi(frame):
frame.index = MultiIndex.from_product([range(5), range(2)])
return frame


@pytest.mark.slow
@pytest.mark.parametrize(
"op, args, targop",
Expand All @@ -707,100 +737,110 @@ def test_groupby_cum_skipna(op, skipna, input, exp):
("shift", (1,), lambda x: x.shift()),
],
)
def test_cython_transform_frame(op, args, targop):
s = Series(np.random.randn(1000))
s_missing = s.copy()
s_missing.iloc[2:10] = np.nan
labels = np.random.randint(0, 50, size=1000).astype(float)
strings = list("qwertyuiopasdfghjklz")
strings_missing = strings[:]
strings_missing[5] = np.nan
df = DataFrame(
{
"float": s,
"float_missing": s_missing,
"int": [1, 1, 1, 1, 2] * 200,
"datetime": date_range("1990-1-1", periods=1000),
"timedelta": pd.timedelta_range(1, freq="s", periods=1000),
"string": strings * 50,
"string_missing": strings_missing * 50,
},
columns=[
"float",
"float_missing",
"int",
"datetime",
"timedelta",
"string",
"string_missing",
],
)
df["cat"] = df["string"].astype("category")

df2 = df.copy()
df2.index = MultiIndex.from_product([range(100), range(10)])

# DataFrame - Single and MultiIndex,
# group by values, index level, columns
for df in [df, df2]:
for gb_target in [
{"by": labels},
{"level": 0},
{"by": "string"},
]: # {"by": 'string_missing'}]:
# {"by": ['int','string']}]:
# TODO: remove or enable commented-out code

gb = df.groupby(group_keys=False, **gb_target)

if op != "shift" and "int" not in gb_target:
# numeric apply fastpath promotes dtype so have
# to apply separately and concat
i = gb[["int"]].apply(targop)
f = gb[["float", "float_missing"]].apply(targop)
expected = concat([f, i], axis=1)
else:
expected = gb.apply(targop)

expected = expected.sort_index(axis=1)
if op == "shift":
expected["string_missing"] = expected["string_missing"].fillna(
np.nan, downcast=False
)
expected["string"] = expected["string"].fillna(np.nan, downcast=False)

result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)
result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)
# individual columns
for c in df:
if (
c not in ["float", "int", "float_missing"]
and op != "shift"
and not (c == "timedelta" and op == "cumsum")
):
msg = "|".join(
[
"does not support .* operations",
".* is not supported for object dtype",
"is not implemented for this dtype",
]
)
with pytest.raises(TypeError, match=msg):
gb[c].transform(op)
with pytest.raises(TypeError, match=msg):
getattr(gb[c], op)()
else:
expected = gb[c].apply(targop)
expected.name = c
if c in ["string_missing", "string"]:
expected = expected.fillna(np.nan, downcast=False)

res = gb[c].transform(op, *args)
tm.assert_series_equal(expected, res)
res2 = getattr(gb[c], op)(*args)
tm.assert_series_equal(expected, res2)
@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"])
@pytest.mark.parametrize(
"gb_target",
[
{"by": np.random.randint(0, 50, size=10).astype(float)},
{"level": 0},
{"by": "string"},
# {"by": 'string_missing'}]:
# {"by": ['int','string']}]:
# TODO: remove or enable commented-out code
],
)
def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
df = request.getfixturevalue(df_fix)
gb = df.groupby(group_keys=False, **gb_target)

if op != "shift" and "int" not in gb_target:
# numeric apply fastpath promotes dtype so have
# to apply separately and concat
i = gb[["int"]].apply(targop)
f = gb[["float", "float_missing"]].apply(targop)
expected = concat([f, i], axis=1)
else:
expected = gb.apply(targop)

expected = expected.sort_index(axis=1)
if op == "shift":
expected["string_missing"] = expected["string_missing"].fillna(
np.nan, downcast=False
)
expected["string"] = expected["string"].fillna(np.nan, downcast=False)

result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)
result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)


@pytest.mark.slow
@pytest.mark.parametrize(
"op, args, targop",
[
("cumprod", (), lambda x: x.cumprod()),
("cumsum", (), lambda x: x.cumsum()),
("shift", (-1,), lambda x: x.shift(-1)),
("shift", (1,), lambda x: x.shift()),
],
)
@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"])
@pytest.mark.parametrize(
"gb_target",
[
{"by": np.random.randint(0, 50, size=10).astype(float)},
{"level": 0},
{"by": "string"},
# {"by": 'string_missing'}]:
# {"by": ['int','string']}]:
# TODO: remove or enable commented-out code
],
)
@pytest.mark.parametrize(
"column",
[
"float",
"float_missing",
"int",
"datetime",
"timedelta",
"string",
"string_missing",
],
)
def test_cython_transform_frame_column(
request, op, args, targop, df_fix, gb_target, column
):
df = request.getfixturevalue(df_fix)
gb = df.groupby(group_keys=False, **gb_target)
c = column
if (
c not in ["float", "int", "float_missing"]
and op != "shift"
and not (c == "timedelta" and op == "cumsum")
):
msg = "|".join(
[
"does not support .* operations",
".* is not supported for object dtype",
"is not implemented for this dtype",
]
)
with pytest.raises(TypeError, match=msg):
gb[c].transform(op)
with pytest.raises(TypeError, match=msg):
getattr(gb[c], op)()
else:
expected = gb[c].apply(targop)
expected.name = c
if c in ["string_missing", "string"]:
expected = expected.fillna(np.nan, downcast=False)

res = gb[c].transform(op, *args)
tm.assert_series_equal(expected, res)
res2 = getattr(gb[c], op)(*args)
tm.assert_series_equal(expected, res2)


def test_transform_with_non_scalar_group():
Expand Down
11 changes: 7 additions & 4 deletions pandas/tests/indexes/base_class/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._libs import index as libindex

import pandas as pd
from pandas import (
Index,
Expand Down Expand Up @@ -40,14 +42,15 @@ def test_get_indexer_non_unique_dtype_mismatch(self):

class TestGetLoc:
@pytest.mark.slow # to_flat_index takes a while
def test_get_loc_tuple_monotonic_above_size_cutoff(self):
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
# Go through the libindex path for which using
# _bin_search vs ndarray.searchsorted makes a difference

lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
dti = pd.date_range("2016-01-01", periods=100)
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
lev = list("ABCD")
dti = pd.date_range("2016-01-01", periods=10)

mi = pd.MultiIndex.from_product([lev, range(10**3), dti])
mi = pd.MultiIndex.from_product([lev, range(5), dti])
oidx = mi.to_flat_index()

loc = len(oidx) // 2
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@

@pytest.fixture
def m():
return 50
return 5


@pytest.fixture
def n():
return 1000
return 100


@pytest.fixture
Expand Down
52 changes: 31 additions & 21 deletions pandas/tests/plotting/test_boxplot_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,20 @@ def test_stacked_boxplot_set_axis(self):
)

@pytest.mark.slow
def test_boxplot_legacy1(self):
@pytest.mark.parametrize(
"kwargs, warn",
[
[{"return_type": "dict"}, None],
[{"column": ["one", "two"]}, None],
[{"column": ["one", "two"], "by": "indic"}, UserWarning],
[{"column": ["one"], "by": ["indic", "indic2"]}, None],
[{"by": "indic"}, UserWarning],
[{"by": ["indic", "indic2"]}, UserWarning],
[{"notch": 1}, None],
[{"by": "indic", "notch": 1}, UserWarning],
],
)
def test_boxplot_legacy1(self, kwargs, warn):
df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
Expand All @@ -60,20 +73,13 @@ def test_boxplot_legacy1(self):
df["indic"] = ["foo", "bar"] * 3
df["indic2"] = ["foo", "bar", "foo"] * 2

_check_plot_works(df.boxplot, return_type="dict")
_check_plot_works(df.boxplot, column=["one", "two"], return_type="dict")
# _check_plot_works adds an ax so catch warning. see GH #13188
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
_check_plot_works(df.boxplot, column=["one", "two"], by="indic")
_check_plot_works(df.boxplot, column="one", by=["indic", "indic2"])
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
_check_plot_works(df.boxplot, by="indic")
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
_check_plot_works(df.boxplot, by=["indic", "indic2"])
_check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict")
_check_plot_works(df.boxplot, notch=1, return_type="dict")
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
_check_plot_works(df.boxplot, by="indic", notch=1)
# _check_plot_works can add an ax so catch warning. see GH #13188
with tm.assert_produces_warning(warn, check_stacklevel=False):
_check_plot_works(df.boxplot, **kwargs)

def test_boxplot_legacy1_series(self):
ser = Series(np.random.randn(6))
_check_plot_works(plotting._core.boxplot, data=ser, return_type="dict")

def test_boxplot_legacy2(self):
df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"])
Expand Down Expand Up @@ -347,17 +353,21 @@ def test_boxplot_legacy2(self):
axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
_check_axes_shape(axes, axes_num=1, layout=(1, 1))

def test_boxplot_legacy3(self):
@pytest.mark.parametrize(
"subplots, warn, axes_num, layout",
[[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]],
)
def test_boxplot_legacy3(self, subplots, warn, axes_num, layout):
tuples = zip(string.ascii_letters[:10], range(10))
df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
msg = "DataFrame.groupby with axis=1 is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
grouped = df.unstack(level=1).groupby(level=0, axis=1)
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
axes = _check_plot_works(grouped.boxplot, return_type="axes")
_check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
_check_axes_shape(axes, axes_num=1, layout=(1, 1))
with tm.assert_produces_warning(warn, check_stacklevel=False):
axes = _check_plot_works(
grouped.boxplot, subplots=subplots, return_type="axes"
)
_check_axes_shape(axes, axes_num=axes_num, layout=layout)

def test_grouped_plot_fignums(self):
n = 10
Expand Down
Loading