Skip to content

Commit 9de2a19

Browse files
authored
REF: Add tests.groupby.methods (#55312)
* REF: Add tests.groupby.methods * Merge cleanup * Refactor * Refactor * Show value of ymin * fixup * Revert * Revert
1 parent ae177e8 commit 9de2a19

19 files changed

+1567
-1566
lines changed

pandas/tests/groupby/methods/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import numpy as np
2+
3+
from pandas import (
4+
DataFrame,
5+
Index,
6+
Series,
7+
)
8+
import pandas._testing as tm
9+
10+
11+
def test_corrwith_with_1_axis():
12+
# GH 47723
13+
df = DataFrame({"a": [1, 1, 2], "b": [3, 7, 4]})
14+
gb = df.groupby("a")
15+
16+
msg = "DataFrameGroupBy.corrwith with axis=1 is deprecated"
17+
with tm.assert_produces_warning(FutureWarning, match=msg):
18+
result = gb.corrwith(df, axis=1)
19+
index = Index(
20+
data=[(1, 0), (1, 1), (1, 2), (2, 2), (2, 0), (2, 1)],
21+
name=("a", None),
22+
)
23+
expected = Series([np.nan] * 6, index=index)
24+
tm.assert_series_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import (
6+
DataFrame,
7+
Index,
8+
MultiIndex,
9+
Timestamp,
10+
)
11+
import pandas._testing as tm
12+
13+
14+
def test_apply_describe_bug(mframe):
15+
grouped = mframe.groupby(level="first")
16+
grouped.describe() # it works!
17+
18+
19+
def test_series_describe_multikey():
20+
ts = tm.makeTimeSeries()
21+
grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
22+
result = grouped.describe()
23+
tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False)
24+
tm.assert_series_equal(result["std"], grouped.std(), check_names=False)
25+
tm.assert_series_equal(result["min"], grouped.min(), check_names=False)
26+
27+
28+
def test_series_describe_single():
29+
ts = tm.makeTimeSeries()
30+
grouped = ts.groupby(lambda x: x.month)
31+
result = grouped.apply(lambda x: x.describe())
32+
expected = grouped.describe().stack(future_stack=True)
33+
tm.assert_series_equal(result, expected)
34+
35+
36+
@pytest.mark.parametrize("keys", ["key1", ["key1", "key2"]])
37+
def test_series_describe_as_index(as_index, keys):
38+
# GH#49256
39+
df = DataFrame(
40+
{
41+
"key1": ["one", "two", "two", "three", "two"],
42+
"key2": ["one", "two", "two", "three", "two"],
43+
"foo2": [1, 2, 4, 4, 6],
44+
}
45+
)
46+
gb = df.groupby(keys, as_index=as_index)["foo2"]
47+
result = gb.describe()
48+
expected = DataFrame(
49+
{
50+
"key1": ["one", "three", "two"],
51+
"count": [1.0, 1.0, 3.0],
52+
"mean": [1.0, 4.0, 4.0],
53+
"std": [np.nan, np.nan, 2.0],
54+
"min": [1.0, 4.0, 2.0],
55+
"25%": [1.0, 4.0, 3.0],
56+
"50%": [1.0, 4.0, 4.0],
57+
"75%": [1.0, 4.0, 5.0],
58+
"max": [1.0, 4.0, 6.0],
59+
}
60+
)
61+
if len(keys) == 2:
62+
expected.insert(1, "key2", expected["key1"])
63+
if as_index:
64+
expected = expected.set_index(keys)
65+
tm.assert_frame_equal(result, expected)
66+
67+
68+
def test_frame_describe_multikey(tsframe):
69+
grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
70+
result = grouped.describe()
71+
desc_groups = []
72+
for col in tsframe:
73+
group = grouped[col].describe()
74+
# GH 17464 - Remove duplicate MultiIndex levels
75+
group_col = MultiIndex(
76+
levels=[[col], group.columns],
77+
codes=[[0] * len(group.columns), range(len(group.columns))],
78+
)
79+
group = DataFrame(group.values, columns=group_col, index=group.index)
80+
desc_groups.append(group)
81+
expected = pd.concat(desc_groups, axis=1)
82+
tm.assert_frame_equal(result, expected)
83+
84+
msg = "DataFrame.groupby with axis=1 is deprecated"
85+
with tm.assert_produces_warning(FutureWarning, match=msg):
86+
groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1)
87+
result = groupedT.describe()
88+
expected = tsframe.describe().T
89+
# reverting the change from https://github.com/pandas-dev/pandas/pull/35441/
90+
expected.index = MultiIndex(
91+
levels=[[0, 1], expected.index],
92+
codes=[[0, 0, 1, 1], range(len(expected.index))],
93+
)
94+
tm.assert_frame_equal(result, expected)
95+
96+
97+
def test_frame_describe_tupleindex():
98+
# GH 14848 - regression from 0.19.0 to 0.19.1
99+
df1 = DataFrame(
100+
{
101+
"x": [1, 2, 3, 4, 5] * 3,
102+
"y": [10, 20, 30, 40, 50] * 3,
103+
"z": [100, 200, 300, 400, 500] * 3,
104+
}
105+
)
106+
df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5
107+
df2 = df1.rename(columns={"k": "key"})
108+
msg = "Names should be list-like for a MultiIndex"
109+
with pytest.raises(ValueError, match=msg):
110+
df1.groupby("k").describe()
111+
with pytest.raises(ValueError, match=msg):
112+
df2.groupby("key").describe()
113+
114+
115+
def test_frame_describe_unstacked_format():
116+
# GH 4792
117+
prices = {
118+
Timestamp("2011-01-06 10:59:05", tz=None): 24990,
119+
Timestamp("2011-01-06 12:43:33", tz=None): 25499,
120+
Timestamp("2011-01-06 12:54:09", tz=None): 25499,
121+
}
122+
volumes = {
123+
Timestamp("2011-01-06 10:59:05", tz=None): 1500000000,
124+
Timestamp("2011-01-06 12:43:33", tz=None): 5000000000,
125+
Timestamp("2011-01-06 12:54:09", tz=None): 100000000,
126+
}
127+
df = DataFrame({"PRICE": prices, "VOLUME": volumes})
128+
result = df.groupby("PRICE").VOLUME.describe()
129+
data = [
130+
df[df.PRICE == 24990].VOLUME.describe().values.tolist(),
131+
df[df.PRICE == 25499].VOLUME.describe().values.tolist(),
132+
]
133+
expected = DataFrame(
134+
data,
135+
index=Index([24990, 25499], name="PRICE"),
136+
columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
137+
)
138+
tm.assert_frame_equal(result, expected)
139+
140+
141+
@pytest.mark.filterwarnings(
142+
"ignore:"
143+
"indexing past lexsort depth may impact performance:"
144+
"pandas.errors.PerformanceWarning"
145+
)
146+
@pytest.mark.parametrize("as_index", [True, False])
147+
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
148+
def test_describe_with_duplicate_output_column_names(as_index, keys):
149+
# GH 35314
150+
df = DataFrame(
151+
{
152+
"a1": [99, 99, 99, 88, 88, 88],
153+
"a2": [99, 99, 99, 88, 88, 88],
154+
"b": [1, 2, 3, 4, 5, 6],
155+
"c": [10, 20, 30, 40, 50, 60],
156+
},
157+
columns=["a1", "a2", "b", "b"],
158+
copy=False,
159+
)
160+
if keys == ["a1"]:
161+
df = df.drop(columns="a2")
162+
163+
expected = (
164+
DataFrame.from_records(
165+
[
166+
("b", "count", 3.0, 3.0),
167+
("b", "mean", 5.0, 2.0),
168+
("b", "std", 1.0, 1.0),
169+
("b", "min", 4.0, 1.0),
170+
("b", "25%", 4.5, 1.5),
171+
("b", "50%", 5.0, 2.0),
172+
("b", "75%", 5.5, 2.5),
173+
("b", "max", 6.0, 3.0),
174+
("b", "count", 3.0, 3.0),
175+
("b", "mean", 5.0, 2.0),
176+
("b", "std", 1.0, 1.0),
177+
("b", "min", 4.0, 1.0),
178+
("b", "25%", 4.5, 1.5),
179+
("b", "50%", 5.0, 2.0),
180+
("b", "75%", 5.5, 2.5),
181+
("b", "max", 6.0, 3.0),
182+
],
183+
)
184+
.set_index([0, 1])
185+
.T
186+
)
187+
expected.columns.names = [None, None]
188+
if len(keys) == 2:
189+
expected.index = MultiIndex(
190+
levels=[[88, 99], [88, 99]], codes=[[0, 1], [0, 1]], names=["a1", "a2"]
191+
)
192+
else:
193+
expected.index = Index([88, 99], name="a1")
194+
195+
if not as_index:
196+
expected = expected.reset_index()
197+
198+
result = df.groupby(keys, as_index=as_index).describe()
199+
200+
tm.assert_frame_equal(result, expected)
201+
202+
203+
def test_describe_duplicate_columns():
204+
# GH#50806
205+
df = DataFrame([[0, 1, 2, 3]])
206+
df.columns = [0, 1, 2, 0]
207+
gb = df.groupby(df[1])
208+
result = gb.describe(percentiles=[])
209+
210+
columns = ["count", "mean", "std", "min", "50%", "max"]
211+
frames = [
212+
DataFrame([[1.0, val, np.nan, val, val, val]], index=[1], columns=columns)
213+
for val in (0.0, 2.0, 3.0)
214+
]
215+
expected = pd.concat(frames, axis=1)
216+
expected.columns = MultiIndex(
217+
levels=[[0, 2], columns],
218+
codes=[6 * [0] + 6 * [1] + 6 * [0], 3 * list(range(6))],
219+
)
220+
expected.index.names = [1]
221+
tm.assert_frame_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import (
5+
DataFrame,
6+
Index,
7+
Series,
8+
)
9+
import pandas._testing as tm
10+
11+
12+
@pytest.mark.parametrize(
13+
"in_vals, out_vals",
14+
[
15+
# Basics: strictly increasing (T), strictly decreasing (F),
16+
# abs val increasing (F), non-strictly increasing (T)
17+
([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]),
18+
# Test with inf vals
19+
(
20+
[1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf],
21+
[True, False, True, False],
22+
),
23+
# Test with nan vals; should always be False
24+
(
25+
[1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
26+
[False, False, False, False],
27+
),
28+
],
29+
)
30+
def test_is_monotonic_increasing(in_vals, out_vals):
31+
# GH 17015
32+
source_dict = {
33+
"A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
34+
"B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
35+
"C": in_vals,
36+
}
37+
df = DataFrame(source_dict)
38+
result = df.groupby("B").C.is_monotonic_increasing
39+
index = Index(list("abcd"), name="B")
40+
expected = Series(index=index, data=out_vals, name="C")
41+
tm.assert_series_equal(result, expected)
42+
43+
# Also check result equal to manually taking x.is_monotonic_increasing.
44+
expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing)
45+
tm.assert_series_equal(result, expected)
46+
47+
48+
@pytest.mark.parametrize(
49+
"in_vals, out_vals",
50+
[
51+
# Basics: strictly decreasing (T), strictly increasing (F),
52+
# abs val decreasing (F), non-strictly increasing (T)
53+
([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]),
54+
# Test with inf vals
55+
(
56+
[np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf],
57+
[True, True, False, True],
58+
),
59+
# Test with nan vals; should always be False
60+
(
61+
[1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan],
62+
[False, False, False, False],
63+
),
64+
],
65+
)
66+
def test_is_monotonic_decreasing(in_vals, out_vals):
67+
# GH 17015
68+
source_dict = {
69+
"A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
70+
"B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"],
71+
"C": in_vals,
72+
}
73+
74+
df = DataFrame(source_dict)
75+
result = df.groupby("B").C.is_monotonic_decreasing
76+
index = Index(list("abcd"), name="B")
77+
expected = Series(index=index, data=out_vals, name="C")
78+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)