Skip to content

Commit dc4c474

Browse files
authored
TST: Remove groupby/test_function.py (#56338)
* TST: Finish removal of groupby/test_function.py * Move one more
1 parent 52649ea commit dc4c474

File tree

5 files changed

+134
-98
lines changed

5 files changed

+134
-98
lines changed

pandas/tests/groupby/methods/test_nth.py

+21
Original file line numberDiff line numberDiff line change
@@ -898,3 +898,24 @@ def test_nth_after_selection(selection, dropna):
898898
locs = [0, 2]
899899
expected = df.loc[locs, selection]
900900
tm.assert_equal(result, expected)
901+
902+
903+
@pytest.mark.parametrize(
904+
"data",
905+
[
906+
(
907+
Timestamp("2011-01-15 12:50:28.502376"),
908+
Timestamp("2011-01-20 12:50:28.593448"),
909+
),
910+
(24650000000000001, 24650000000000002),
911+
],
912+
)
913+
def test_groupby_nth_int_like_precision(data):
914+
# GH#6620, GH#9311
915+
df = DataFrame({"a": [1, 1], "b": data})
916+
917+
grouped = df.groupby("a")
918+
result = grouped.nth(0)
919+
expected = DataFrame({"a": 1, "b": [data[0]]})
920+
921+
tm.assert_frame_equal(result, expected)
+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""
2+
Tests that apply to all groupby operation methods.
3+
4+
The only tests that should appear here are those that use the `groupby_func` fixture.
5+
Even if it does use that fixture, prefer a more specific test file if it available
6+
such as:
7+
8+
- test_categorical
9+
- test_groupby_dropna
10+
- test_groupby_subclass
11+
- test_raises
12+
"""
13+
14+
import pytest
15+
16+
import pandas as pd
17+
from pandas import DataFrame
18+
import pandas._testing as tm
19+
from pandas.tests.groupby import get_groupby_method_args
20+
21+
22+
def test_multiindex_group_all_columns_when_empty(groupby_func):
23+
# GH 32464
24+
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
25+
gb = df.groupby(["a", "b", "c"], group_keys=False)
26+
method = getattr(gb, groupby_func)
27+
args = get_groupby_method_args(groupby_func, df)
28+
29+
warn = FutureWarning if groupby_func == "fillna" else None
30+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
31+
with tm.assert_produces_warning(warn, match=warn_msg):
32+
result = method(*args).index
33+
expected = df.index
34+
tm.assert_index_equal(result, expected)
35+
36+
37+
def test_duplicate_columns(request, groupby_func, as_index):
38+
# GH#50806
39+
if groupby_func == "corrwith":
40+
msg = "GH#50845 - corrwith fails when there are duplicate columns"
41+
request.applymarker(pytest.mark.xfail(reason=msg))
42+
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
43+
args = get_groupby_method_args(groupby_func, df)
44+
gb = df.groupby("a", as_index=as_index)
45+
warn = FutureWarning if groupby_func == "fillna" else None
46+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
47+
with tm.assert_produces_warning(warn, match=warn_msg):
48+
result = getattr(gb, groupby_func)(*args)
49+
50+
expected_df = df.set_axis(["a", "b", "c"], axis=1)
51+
expected_args = get_groupby_method_args(groupby_func, expected_df)
52+
expected_gb = expected_df.groupby("a", as_index=as_index)
53+
warn = FutureWarning if groupby_func == "fillna" else None
54+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
55+
with tm.assert_produces_warning(warn, match=warn_msg):
56+
expected = getattr(expected_gb, groupby_func)(*expected_args)
57+
if groupby_func not in ("size", "ngroup", "cumcount"):
58+
expected = expected.rename(columns={"c": "b"})
59+
tm.assert_equal(result, expected)
60+
61+
62+
@pytest.mark.parametrize(
63+
"idx",
64+
[
65+
pd.Index(["a", "a"], name="foo"),
66+
pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
67+
],
68+
)
69+
def test_dup_labels_output_shape(groupby_func, idx):
70+
if groupby_func in {"size", "ngroup", "cumcount"}:
71+
pytest.skip(f"Not applicable for {groupby_func}")
72+
73+
df = DataFrame([[1, 1]], columns=idx)
74+
grp_by = df.groupby([0])
75+
76+
args = get_groupby_method_args(groupby_func, df)
77+
warn = FutureWarning if groupby_func == "fillna" else None
78+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
79+
with tm.assert_produces_warning(warn, match=warn_msg):
80+
result = getattr(grp_by, groupby_func)(*args)
81+
82+
assert result.shape == (1, 2)
83+
tm.assert_index_equal(result.columns, idx)

pandas/tests/groupby/test_groupby.py

-25
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import pandas._testing as tm
3030
from pandas.core.arrays import BooleanArray
3131
import pandas.core.common as com
32-
from pandas.tests.groupby import get_groupby_method_args
3332

3433
pytestmark = pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
3534

@@ -2422,30 +2421,6 @@ def test_group_on_empty_multiindex(transformation_func, request):
24222421
tm.assert_equal(result, expected)
24232422

24242423

2425-
@pytest.mark.parametrize(
2426-
"idx",
2427-
[
2428-
Index(["a", "a"], name="foo"),
2429-
MultiIndex.from_tuples((("a", "a"), ("a", "a")), names=["foo", "bar"]),
2430-
],
2431-
)
2432-
def test_dup_labels_output_shape(groupby_func, idx):
2433-
if groupby_func in {"size", "ngroup", "cumcount"}:
2434-
pytest.skip(f"Not applicable for {groupby_func}")
2435-
2436-
df = DataFrame([[1, 1]], columns=idx)
2437-
grp_by = df.groupby([0])
2438-
2439-
args = get_groupby_method_args(groupby_func, df)
2440-
warn = FutureWarning if groupby_func == "fillna" else None
2441-
warn_msg = "DataFrameGroupBy.fillna is deprecated"
2442-
with tm.assert_produces_warning(warn, match=warn_msg):
2443-
result = getattr(grp_by, groupby_func)(*args)
2444-
2445-
assert result.shape == (1, 2)
2446-
tm.assert_index_equal(result.columns, idx)
2447-
2448-
24492424
def test_groupby_crash_on_nunique(axis):
24502425
# Fix following 30253
24512426
dti = date_range("2016-01-01", periods=2, name="foo")

pandas/tests/groupby/test_function.py renamed to pandas/tests/groupby/test_numeric_only.py

-73
Original file line numberDiff line numberDiff line change
@@ -205,39 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
205205
tm.assert_index_equal(result.columns, expected_columns)
206206

207207

208-
@pytest.mark.parametrize(
209-
"i",
210-
[
211-
(
212-
Timestamp("2011-01-15 12:50:28.502376"),
213-
Timestamp("2011-01-20 12:50:28.593448"),
214-
),
215-
(24650000000000001, 24650000000000002),
216-
],
217-
)
218-
def test_groupby_non_arithmetic_agg_int_like_precision(i):
219-
# see gh-6620, gh-9311
220-
df = DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}])
221-
222-
grp_exp = {
223-
"first": {"expected": i[0]},
224-
"last": {"expected": i[1]},
225-
"min": {"expected": i[0]},
226-
"max": {"expected": i[1]},
227-
"nth": {"expected": i[1], "args": [1]},
228-
"count": {"expected": 2},
229-
}
230-
231-
for method, data in grp_exp.items():
232-
if "args" not in data:
233-
data["args"] = []
234-
235-
grouped = df.groupby("a")
236-
res = getattr(grouped, method)(*data["args"])
237-
238-
assert res.iloc[0].b == data["expected"]
239-
240-
241208
@pytest.mark.parametrize("numeric_only", [True, False, None])
242209
def test_axis1_numeric_only(request, groupby_func, numeric_only):
243210
if groupby_func in ("idxmax", "idxmin"):
@@ -543,43 +510,3 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
543510
result = method(*args, numeric_only=True)
544511
expected = method(*args, numeric_only=False)
545512
tm.assert_series_equal(result, expected)
546-
547-
548-
def test_multiindex_group_all_columns_when_empty(groupby_func):
549-
# GH 32464
550-
df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"])
551-
gb = df.groupby(["a", "b", "c"], group_keys=False)
552-
method = getattr(gb, groupby_func)
553-
args = get_groupby_method_args(groupby_func, df)
554-
555-
warn = FutureWarning if groupby_func == "fillna" else None
556-
warn_msg = "DataFrameGroupBy.fillna is deprecated"
557-
with tm.assert_produces_warning(warn, match=warn_msg):
558-
result = method(*args).index
559-
expected = df.index
560-
tm.assert_index_equal(result, expected)
561-
562-
563-
def test_duplicate_columns(request, groupby_func, as_index):
564-
# GH#50806
565-
if groupby_func == "corrwith":
566-
msg = "GH#50845 - corrwith fails when there are duplicate columns"
567-
request.applymarker(pytest.mark.xfail(reason=msg))
568-
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
569-
args = get_groupby_method_args(groupby_func, df)
570-
gb = df.groupby("a", as_index=as_index)
571-
warn = FutureWarning if groupby_func == "fillna" else None
572-
warn_msg = "DataFrameGroupBy.fillna is deprecated"
573-
with tm.assert_produces_warning(warn, match=warn_msg):
574-
result = getattr(gb, groupby_func)(*args)
575-
576-
expected_df = df.set_axis(["a", "b", "c"], axis=1)
577-
expected_args = get_groupby_method_args(groupby_func, expected_df)
578-
expected_gb = expected_df.groupby("a", as_index=as_index)
579-
warn = FutureWarning if groupby_func == "fillna" else None
580-
warn_msg = "DataFrameGroupBy.fillna is deprecated"
581-
with tm.assert_produces_warning(warn, match=warn_msg):
582-
expected = getattr(expected_gb, groupby_func)(*expected_args)
583-
if groupby_func not in ("size", "ngroup", "cumcount"):
584-
expected = expected.rename(columns={"c": "b"})
585-
tm.assert_equal(result, expected)

pandas/tests/groupby/test_reductions.py

+30
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,36 @@ def test_idxmin_idxmax_returns_int_types(func, values, numeric_only):
235235
tm.assert_frame_equal(result, expected)
236236

237237

238+
@pytest.mark.parametrize(
239+
"data",
240+
[
241+
(
242+
Timestamp("2011-01-15 12:50:28.502376"),
243+
Timestamp("2011-01-20 12:50:28.593448"),
244+
),
245+
(24650000000000001, 24650000000000002),
246+
],
247+
)
248+
@pytest.mark.parametrize("method", ["count", "min", "max", "first", "last"])
249+
def test_groupby_non_arithmetic_agg_int_like_precision(method, data):
250+
# GH#6620, GH#9311
251+
df = DataFrame({"a": [1, 1], "b": data})
252+
253+
grouped = df.groupby("a")
254+
result = getattr(grouped, method)()
255+
if method == "count":
256+
expected_value = 2
257+
elif method == "first":
258+
expected_value = data[0]
259+
elif method == "last":
260+
expected_value = data[1]
261+
else:
262+
expected_value = getattr(df["b"], method)()
263+
expected = DataFrame({"b": [expected_value]}, index=pd.Index([1], name="a"))
264+
265+
tm.assert_frame_equal(result, expected)
266+
267+
238268
def test_idxmin_idxmax_axis1():
239269
df = DataFrame(
240270
np.random.default_rng(2).standard_normal((10, 4)), columns=["A", "B", "C", "D"]

0 commit comments

Comments
 (0)