diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 397500f64787f..5caf3d2ac1d8b 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -697,6 +697,36 @@ def test_groupby_cum_skipna(op, skipna, input, exp): tm.assert_series_equal(expected, result) +@pytest.fixture +def frame(): + floating = Series(np.random.randn(10)) + floating_missing = floating.copy() + floating_missing.iloc[2:7] = np.nan + strings = list("abcde") * 2 + strings_missing = strings[:] + strings_missing[5] = np.nan + + df = DataFrame( + { + "float": floating, + "float_missing": floating_missing, + "int": [1, 1, 1, 1, 2] * 2, + "datetime": date_range("1990-1-1", periods=10), + "timedelta": pd.timedelta_range(1, freq="s", periods=10), + "string": strings, + "string_missing": strings_missing, + "cat": Categorical(strings), + }, + ) + return df + + +@pytest.fixture +def frame_mi(frame): + frame.index = MultiIndex.from_product([range(5), range(2)]) + return frame + + @pytest.mark.slow @pytest.mark.parametrize( "op, args, targop", @@ -707,100 +737,110 @@ def test_groupby_cum_skipna(op, skipna, input, exp): ("shift", (1,), lambda x: x.shift()), ], ) -def test_cython_transform_frame(op, args, targop): - s = Series(np.random.randn(1000)) - s_missing = s.copy() - s_missing.iloc[2:10] = np.nan - labels = np.random.randint(0, 50, size=1000).astype(float) - strings = list("qwertyuiopasdfghjklz") - strings_missing = strings[:] - strings_missing[5] = np.nan - df = DataFrame( - { - "float": s, - "float_missing": s_missing, - "int": [1, 1, 1, 1, 2] * 200, - "datetime": date_range("1990-1-1", periods=1000), - "timedelta": pd.timedelta_range(1, freq="s", periods=1000), - "string": strings * 50, - "string_missing": strings_missing * 50, - }, - columns=[ - "float", - "float_missing", - "int", - "datetime", - "timedelta", - "string", - "string_missing", - ], - ) - df["cat"] = df["string"].astype("category") - - df2 = df.copy() - df2.index = MultiIndex.from_product([range(100), range(10)]) - - # DataFrame - Single and MultiIndex, - # group by values, index level, columns - for df in [df, df2]: - for gb_target in [ - {"by": labels}, - {"level": 0}, - {"by": "string"}, - ]: # {"by": 'string_missing'}]: - # {"by": ['int','string']}]: - # TODO: remove or enable commented-out code - - gb = df.groupby(group_keys=False, **gb_target) - - if op != "shift" and "int" not in gb_target: - # numeric apply fastpath promotes dtype so have - # to apply separately and concat - i = gb[["int"]].apply(targop) - f = gb[["float", "float_missing"]].apply(targop) - expected = concat([f, i], axis=1) - else: - expected = gb.apply(targop) - - expected = expected.sort_index(axis=1) - if op == "shift": - expected["string_missing"] = expected["string_missing"].fillna( - np.nan, downcast=False - ) - expected["string"] = expected["string"].fillna(np.nan, downcast=False) - - result = gb[expected.columns].transform(op, *args).sort_index(axis=1) - tm.assert_frame_equal(result, expected) - result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1) - tm.assert_frame_equal(result, expected) - # individual columns - for c in df: - if ( - c not in ["float", "int", "float_missing"] - and op != "shift" - and not (c == "timedelta" and op == "cumsum") - ): - msg = "|".join( - [ - "does not support .* operations", - ".* is not supported for object dtype", - "is not implemented for this dtype", - ] - ) - with pytest.raises(TypeError, match=msg): - gb[c].transform(op) - with pytest.raises(TypeError, match=msg): - getattr(gb[c], op)() - else: - expected = gb[c].apply(targop) - expected.name = c - if c in ["string_missing", "string"]: - expected = expected.fillna(np.nan, downcast=False) - - res = gb[c].transform(op, *args) - tm.assert_series_equal(expected, res) - res2 = getattr(gb[c], op)(*args) - tm.assert_series_equal(expected, res2) +@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"]) +@pytest.mark.parametrize( + "gb_target", + [ + {"by": np.random.randint(0, 50, size=10).astype(float)}, + {"level": 0}, + {"by": "string"}, + # {"by": 'string_missing'}]: + # {"by": ['int','string']}]: + # TODO: remove or enable commented-out code + ], +) +def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target): + df = request.getfixturevalue(df_fix) + gb = df.groupby(group_keys=False, **gb_target) + + if op != "shift" and "int" not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply separately and concat + i = gb[["int"]].apply(targop) + f = gb[["float", "float_missing"]].apply(targop) + expected = concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + if op == "shift": + expected["string_missing"] = expected["string_missing"].fillna( + np.nan, downcast=False + ) + expected["string"] = expected["string"].fillna(np.nan, downcast=False) + + result = gb[expected.columns].transform(op, *args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) + result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.slow +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +@pytest.mark.parametrize("df_fix", ["frame", "frame_mi"]) +@pytest.mark.parametrize( + "gb_target", + [ + {"by": np.random.randint(0, 50, size=10).astype(float)}, + {"level": 0}, + {"by": "string"}, + # {"by": 'string_missing'}]: + # {"by": ['int','string']}]: + # TODO: remove or enable commented-out code + ], +) +@pytest.mark.parametrize( + "column", + [ + "float", + "float_missing", + "int", + "datetime", + "timedelta", + "string", + "string_missing", + ], +) +def test_cython_transform_frame_column( + request, op, args, targop, df_fix, gb_target, column +): + df = request.getfixturevalue(df_fix) + gb = df.groupby(group_keys=False, **gb_target) + c = column + if ( + c not in ["float", "int", "float_missing"] + and op != "shift" + and not (c == "timedelta" and op == "cumsum") + ): + msg = "|".join( + [ + "does not support .* operations", + ".* is not supported for object dtype", + "is not implemented for this dtype", + ] + ) + with pytest.raises(TypeError, match=msg): + gb[c].transform(op) + with pytest.raises(TypeError, match=msg): + getattr(gb[c], op)() + else: + expected = gb[c].apply(targop) + expected.name = c + if c in ["string_missing", "string"]: + expected = expected.fillna(np.nan, downcast=False) + + res = gb[c].transform(op, *args) + tm.assert_series_equal(expected, res) + res2 = getattr(gb[c], op)(*args) + tm.assert_series_equal(expected, res2) def test_transform_with_non_scalar_group(): diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 070fec47b90e6..5b8b014954314 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._libs import index as libindex + import pandas as pd from pandas import ( Index, @@ -40,14 +42,15 @@ def test_get_indexer_non_unique_dtype_mismatch(self): class TestGetLoc: @pytest.mark.slow # to_flat_index takes a while - def test_get_loc_tuple_monotonic_above_size_cutoff(self): + def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch): # Go through the libindex path for which using # _bin_search vs ndarray.searchsorted makes a difference - lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ") - dti = pd.date_range("2016-01-01", periods=100) + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100) + lev = list("ABCD") + dti = pd.date_range("2016-01-01", periods=10) - mi = pd.MultiIndex.from_product([lev, range(10**3), dti]) + mi = pd.MultiIndex.from_product([lev, range(5), dti]) oidx = mi.to_flat_index() loc = len(oidx) // 2 diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index de36d52921622..507ef63344a26 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -11,12 +11,12 @@ @pytest.fixture def m(): - return 50 + return 5 @pytest.fixture def n(): - return 1000 + return 100 @pytest.fixture diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 81947706f3fe1..075243603e9cd 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -51,7 +51,20 @@ def test_stacked_boxplot_set_axis(self): ) @pytest.mark.slow - def test_boxplot_legacy1(self): + @pytest.mark.parametrize( + "kwargs, warn", + [ + [{"return_type": "dict"}, None], + [{"column": ["one", "two"]}, None], + [{"column": ["one", "two"], "by": "indic"}, UserWarning], + [{"column": ["one"], "by": ["indic", "indic2"]}, None], + [{"by": "indic"}, UserWarning], + [{"by": ["indic", "indic2"]}, UserWarning], + [{"notch": 1}, None], + [{"by": "indic", "notch": 1}, UserWarning], + ], + ) + def test_boxplot_legacy1(self, kwargs, warn): df = DataFrame( np.random.randn(6, 4), index=list(string.ascii_letters[:6]), @@ -60,20 +73,13 @@ def test_boxplot_legacy1(self): df["indic"] = ["foo", "bar"] * 3 df["indic2"] = ["foo", "bar", "foo"] * 2 - _check_plot_works(df.boxplot, return_type="dict") - _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict") - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - _check_plot_works(df.boxplot, column=["one", "two"], by="indic") - _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"]) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - _check_plot_works(df.boxplot, by="indic") - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - _check_plot_works(df.boxplot, by=["indic", "indic2"]) - _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict") - _check_plot_works(df.boxplot, notch=1, return_type="dict") - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - _check_plot_works(df.boxplot, by="indic", notch=1) + # _check_plot_works can add an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(warn, check_stacklevel=False): + _check_plot_works(df.boxplot, **kwargs) + + def test_boxplot_legacy1_series(self): + ser = Series(np.random.randn(6)) + _check_plot_works(plotting._core.boxplot, data=ser, return_type="dict") def test_boxplot_legacy2(self): df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) @@ -347,17 +353,21 @@ def test_boxplot_legacy2(self): axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") _check_axes_shape(axes, axes_num=1, layout=(1, 1)) - def test_boxplot_legacy3(self): + @pytest.mark.parametrize( + "subplots, warn, axes_num, layout", + [[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]], + ) + def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): tuples = zip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) msg = "DataFrame.groupby with axis=1 is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): grouped = df.unstack(level=1).groupby(level=0, axis=1) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - axes = _check_plot_works(grouped.boxplot, return_type="axes") - _check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) - axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") - _check_axes_shape(axes, axes_num=1, layout=(1, 1)) + with tm.assert_produces_warning(warn, check_stacklevel=False): + axes = _check_plot_works( + grouped.boxplot, subplots=subplots, return_type="axes" + ) + _check_axes_shape(axes, axes_num=axes_num, layout=layout) def test_grouped_plot_fignums(self): n = 10 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index a033baa1a5f52..edcb1ab07d7eb 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -682,59 +682,74 @@ def test_grouped_hist_legacy2(self): tm.close() @pytest.mark.slow - def test_grouped_hist_layout(self, hist_df): + @pytest.mark.parametrize( + "msg, plot_col, by_col, layout", + [ + [ + "Layout of 1x1 must be larger than required size 2", + "weight", + "gender", + (1, 1), + ], + [ + "Layout of 1x3 must be larger than required size 4", + "height", + "category", + (1, 3), + ], + [ + "At least one dimension of layout must be positive", + "height", + "category", + (-1, -1), + ], + ], + ) + def test_grouped_hist_layout_error(self, hist_df, msg, plot_col, by_col, layout): df = hist_df - msg = "Layout of 1x1 must be larger than required size 2" - with pytest.raises(ValueError, match=msg): - df.hist(column="weight", by=df.gender, layout=(1, 1)) - - msg = "Layout of 1x3 must be larger than required size 4" with pytest.raises(ValueError, match=msg): - df.hist(column="height", by=df.category, layout=(1, 3)) - - msg = "At least one dimension of layout must be positive" - with pytest.raises(ValueError, match=msg): - df.hist(column="height", by=df.category, layout=(-1, -1)) + df.hist(column=plot_col, by=getattr(df, by_col), layout=layout) + @pytest.mark.slow + def test_grouped_hist_layout_warning(self, hist_df): + df = hist_df with tm.assert_produces_warning(UserWarning, check_stacklevel=False): axes = _check_plot_works( df.hist, column="height", by=df.gender, layout=(2, 1) ) _check_axes_shape(axes, axes_num=2, layout=(2, 1)) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - axes = _check_plot_works( - df.hist, column="height", by=df.gender, layout=(2, -1) - ) - _check_axes_shape(axes, axes_num=2, layout=(2, 1)) - - axes = df.hist(column="height", by=df.category, layout=(4, 1)) - _check_axes_shape(axes, axes_num=4, layout=(4, 1)) - - axes = df.hist(column="height", by=df.category, layout=(-1, 1)) - _check_axes_shape(axes, axes_num=4, layout=(4, 1)) - - axes = df.hist(column="height", by=df.category, layout=(4, 2), figsize=(12, 8)) - _check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8)) - tm.close() + @pytest.mark.slow + @pytest.mark.parametrize( + "layout, check_layout, figsize", + [[(4, 1), (4, 1), None], [(-1, 1), (4, 1), None], [(4, 2), (4, 2), (12, 8)]], + ) + def test_grouped_hist_layout_figsize(self, hist_df, layout, check_layout, figsize): + df = hist_df + axes = df.hist(column="height", by=df.category, layout=layout, figsize=figsize) + _check_axes_shape(axes, axes_num=4, layout=check_layout, figsize=figsize) + @pytest.mark.slow + @pytest.mark.parametrize("kwargs", [{}, {"column": "height", "layout": (2, 2)}]) + def test_grouped_hist_layout_by_warning(self, hist_df, kwargs): + df = hist_df # GH 6769 with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - axes = _check_plot_works( - df.hist, column="height", by="classroom", layout=(2, 2) - ) + axes = _check_plot_works(df.hist, by="classroom", **kwargs) _check_axes_shape(axes, axes_num=3, layout=(2, 2)) - # without column - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): - axes = _check_plot_works(df.hist, by="classroom") - _check_axes_shape(axes, axes_num=3, layout=(2, 2)) - - axes = df.hist(by="gender", layout=(3, 5)) - _check_axes_shape(axes, axes_num=2, layout=(3, 5)) - - axes = df.hist(column=["height", "weight", "category"]) - _check_axes_shape(axes, axes_num=3, layout=(2, 2)) + @pytest.mark.slow + @pytest.mark.parametrize( + "kwargs, axes_num, layout", + [ + [{"by": "gender", "layout": (3, 5)}, 2, (3, 5)], + [{"column": ["height", "weight", "category"]}, 3, (2, 2)], + ], + ) + def test_grouped_hist_layout_axes(self, hist_df, kwargs, axes_num, layout): + df = hist_df + axes = df.hist(**kwargs) + _check_axes_shape(axes, axes_num=axes_num, layout=layout) def test_grouped_hist_multiple_axes(self, hist_df): # GH 6970, GH 7069 diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e8797266fcbbe..f6b50aeb3139d 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -23,6 +23,7 @@ ) mpl = pytest.importorskip("matplotlib") +cm = pytest.importorskip("matplotlib.cm") @td.skip_if_mpl @@ -138,9 +139,7 @@ def test_scatter_matrix_axis(self, pass_axis): _check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) @pytest.mark.slow - def test_andrews_curves(self, iris): - from matplotlib import cm - + def test_andrews_curves_no_warning(self, iris): from pandas.plotting import andrews_curves df = iris @@ -148,58 +147,74 @@ def test_andrews_curves(self, iris): with tm.assert_produces_warning(None): _check_plot_works(andrews_curves, frame=df, class_column="Name") - rgba = ("#556270", "#4ECDC4", "#C7F464") - ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", color=rgba - ) - _check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) - - cnames = ["dodgerblue", "aquamarine", "seagreen"] - ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", color=cnames - ) - _check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) + @pytest.mark.slow + @pytest.mark.parametrize( + "linecolors", + [ + ("#556270", "#4ECDC4", "#C7F464"), + ["dodgerblue", "aquamarine", "seagreen"], + ], + ) + @pytest.mark.parametrize( + "df", + [ + "iris", + DataFrame( + { + "A": np.random.rand(10), + "B": np.random.rand(10), + "C": np.random.rand(10), + "Name": ["A"] * 10, + } + ), + ], + ) + def test_andrews_curves_linecolors(self, request, df, linecolors): + from pandas.plotting import andrews_curves + if isinstance(df, str): + df = request.getfixturevalue(df) ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", colormap=cm.jet - ) - cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] - _check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) - - length = 10 - df = DataFrame( - { - "A": np.random.rand(length), - "B": np.random.rand(length), - "C": np.random.rand(length), - "Name": ["A"] * length, - } + andrews_curves, frame=df, class_column="Name", color=linecolors ) - - _check_plot_works(andrews_curves, frame=df, class_column="Name") - - rgba = ("#556270", "#4ECDC4", "#C7F464") - ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", color=rgba + _check_colors( + ax.get_lines()[:10], linecolors=linecolors, mapping=df["Name"][:10] ) - _check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10]) - cnames = ["dodgerblue", "aquamarine", "seagreen"] - ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", color=cnames - ) - _check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10]) + @pytest.mark.slow + @pytest.mark.parametrize( + "df", + [ + "iris", + DataFrame( + { + "A": np.random.rand(10), + "B": np.random.rand(10), + "C": np.random.rand(10), + "Name": ["A"] * 10, + } + ), + ], + ) + def test_andrews_curves_cmap(self, request, df): + from pandas.plotting import andrews_curves + if isinstance(df, str): + df = request.getfixturevalue(df) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] ax = _check_plot_works( - andrews_curves, frame=df, class_column="Name", colormap=cm.jet + andrews_curves, frame=df, class_column="Name", color=cmaps ) - cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] _check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10]) + @pytest.mark.slow + def test_andrews_curves_handle(self): + from pandas.plotting import andrews_curves + colors = ["b", "g", "r"] df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) ax = andrews_curves(df, "Name", color=colors) - handles, labels = ax.get_legend_handles_labels() + handles, _ = ax.get_legend_handles_labels() _check_colors(handles, linecolors=colors) @pytest.mark.slow