Skip to content

Commit 3f28015

Browse files
authored
Remove unused imports (#41860)
1 parent 017ff7c commit 3f28015

File tree

4 files changed

+178
-167
lines changed

4 files changed

+178
-167
lines changed

pandas/tests/groupby/aggregate/test_aggregate.py

-52
Original file line numberDiff line numberDiff line change
@@ -976,34 +976,6 @@ def aggfunc(x):
976976
tm.assert_frame_equal(result, expected)
977977

978978

979-
@pytest.mark.parametrize("func", ["min", "max"])
980-
def test_groupby_aggregate_period_column(func):
981-
# GH 31471
982-
groups = [1, 2]
983-
periods = pd.period_range("2020", periods=2, freq="Y")
984-
df = DataFrame({"a": groups, "b": periods})
985-
986-
result = getattr(df.groupby("a")["b"], func)()
987-
idx = pd.Int64Index([1, 2], name="a")
988-
expected = Series(periods, index=idx, name="b")
989-
990-
tm.assert_series_equal(result, expected)
991-
992-
993-
@pytest.mark.parametrize("func", ["min", "max"])
994-
def test_groupby_aggregate_period_frame(func):
995-
# GH 31471
996-
groups = [1, 2]
997-
periods = pd.period_range("2020", periods=2, freq="Y")
998-
df = DataFrame({"a": groups, "b": periods})
999-
1000-
result = getattr(df.groupby("a"), func)()
1001-
idx = pd.Int64Index([1, 2], name="a")
1002-
expected = DataFrame({"b": periods}, index=idx)
1003-
1004-
tm.assert_frame_equal(result, expected)
1005-
1006-
1007979
class TestLambdaMangling:
1008980
def test_basic(self):
1009981
df = DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
@@ -1267,30 +1239,6 @@ def test_aggregate_datetime_objects():
12671239
tm.assert_series_equal(result, expected)
12681240

12691241

1270-
def test_aggregate_numeric_object_dtype():
1271-
# https://github.com/pandas-dev/pandas/issues/39329
1272-
# simplified case: multiple object columns where one is all-NaN
1273-
# -> gets split as the all-NaN is inferred as float
1274-
df = DataFrame(
1275-
{"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
1276-
).astype(object)
1277-
result = df.groupby("key").min()
1278-
expected = DataFrame(
1279-
{"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]}
1280-
).set_index("key")
1281-
tm.assert_frame_equal(result, expected)
1282-
1283-
# same but with numbers
1284-
df = DataFrame(
1285-
{"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
1286-
).astype(object)
1287-
result = df.groupby("key").min()
1288-
expected = DataFrame(
1289-
{"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]}
1290-
).set_index("key")
1291-
tm.assert_frame_equal(result, expected)
1292-
1293-
12941242
def test_groupby_index_object_dtype():
12951243
# GH 40014
12961244
df = DataFrame({"c0": ["x", "x", "x"], "c1": ["x", "x", "y"], "p": [0, 1, 2]})

pandas/tests/groupby/test_categorical.py

-14
Original file line numberDiff line numberDiff line change
@@ -1593,20 +1593,6 @@ def test_agg_cython_category_not_implemented_fallback():
15931593
tm.assert_frame_equal(result, expected)
15941594

15951595

1596-
@pytest.mark.parametrize("func", ["min", "max"])
1597-
def test_aggregate_categorical_lost_index(func: str):
1598-
# GH: 28641 groupby drops index, when grouping over categorical column with min/max
1599-
ds = Series(["b"], dtype="category").cat.as_ordered()
1600-
df = DataFrame({"A": [1997], "B": ds})
1601-
result = df.groupby("A").agg({"B": func})
1602-
expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A"))
1603-
1604-
# ordered categorical dtype should be preserved
1605-
expected["B"] = expected["B"].astype(ds.dtype)
1606-
1607-
tm.assert_frame_equal(result, expected)
1608-
1609-
16101596
def test_aggregate_categorical_with_isnan():
16111597
# GH 29837
16121598
df = DataFrame(

pandas/tests/groupby/test_function.py

-101
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._libs.tslibs import iNaT
87
from pandas.errors import UnsupportedFunctionCall
98

109
import pandas as pd
@@ -52,74 +51,6 @@ def dtypes_for_minmax(request):
5251
return (dtype, min_val, max_val)
5352

5453

55-
def test_max_min_non_numeric():
56-
# #2700
57-
aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]})
58-
59-
result = aa.groupby("nn").max()
60-
assert "ss" in result
61-
62-
result = aa.groupby("nn").max(numeric_only=False)
63-
assert "ss" in result
64-
65-
result = aa.groupby("nn").min()
66-
assert "ss" in result
67-
68-
result = aa.groupby("nn").min(numeric_only=False)
69-
assert "ss" in result
70-
71-
72-
def test_max_min_object_multiple_columns(using_array_manager):
73-
# GH#41111 case where the aggregation is valid for some columns but not
74-
# others; we split object blocks column-wise, consistent with
75-
# DataFrame._reduce
76-
77-
df = DataFrame(
78-
{
79-
"A": [1, 1, 2, 2, 3],
80-
"B": [1, "foo", 2, "bar", False],
81-
"C": ["a", "b", "c", "d", "e"],
82-
}
83-
)
84-
df._consolidate_inplace() # should already be consolidate, but double-check
85-
if not using_array_manager:
86-
assert len(df._mgr.blocks) == 2
87-
88-
gb = df.groupby("A")
89-
90-
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
91-
result = gb.max(numeric_only=False)
92-
# "max" is valid for column "C" but not for "B"
93-
ei = Index([1, 2, 3], name="A")
94-
expected = DataFrame({"C": ["b", "d", "e"]}, index=ei)
95-
tm.assert_frame_equal(result, expected)
96-
97-
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
98-
result = gb.min(numeric_only=False)
99-
# "min" is valid for column "C" but not for "B"
100-
ei = Index([1, 2, 3], name="A")
101-
expected = DataFrame({"C": ["a", "c", "e"]}, index=ei)
102-
tm.assert_frame_equal(result, expected)
103-
104-
105-
def test_min_date_with_nans():
106-
# GH26321
107-
dates = pd.to_datetime(
108-
Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d"
109-
).dt.date
110-
df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates})
111-
112-
result = df.groupby("b", as_index=False)["c"].min()["c"]
113-
expected = pd.to_datetime(
114-
Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d"
115-
).dt.date
116-
tm.assert_series_equal(result, expected)
117-
118-
result = df.groupby("b")["c"].min()
119-
expected.index.name = "b"
120-
tm.assert_series_equal(result, expected)
121-
122-
12354
def test_intercept_builtin_sum():
12455
s = Series([1.0, 2.0, np.nan, 3.0])
12556
grouped = s.groupby([0, 1, 2, 2])
@@ -664,38 +595,6 @@ def test_max_nan_bug():
664595
assert not r["File"].isna().any()
665596

666597

667-
def test_max_inat():
668-
# GH#40767 dont interpret iNaT as NaN
669-
ser = Series([1, iNaT])
670-
gb = ser.groupby([1, 1])
671-
672-
result = gb.max(min_count=2)
673-
expected = Series({1: 1}, dtype=np.int64)
674-
tm.assert_series_equal(result, expected, check_exact=True)
675-
676-
result = gb.min(min_count=2)
677-
expected = Series({1: iNaT}, dtype=np.int64)
678-
tm.assert_series_equal(result, expected, check_exact=True)
679-
680-
# not enough entries -> gets masked to NaN
681-
result = gb.min(min_count=3)
682-
expected = Series({1: np.nan})
683-
tm.assert_series_equal(result, expected, check_exact=True)
684-
685-
686-
def test_max_inat_not_all_na():
687-
# GH#40767 dont interpret iNaT as NaN
688-
689-
# make sure we dont round iNaT+1 to iNaT
690-
ser = Series([1, iNaT, 2, iNaT + 1])
691-
gb = ser.groupby([1, 2, 3, 3])
692-
result = gb.min(min_count=2)
693-
694-
# Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy
695-
expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1})
696-
tm.assert_series_equal(result, expected, check_exact=True)
697-
698-
699598
def test_nlargest():
700599
a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
701600
b = Series(list("a" * 5 + "b" * 5))

pandas/tests/groupby/test_min_max.py

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas._libs.tslibs import iNaT
5+
6+
import pandas as pd
7+
from pandas import (
8+
DataFrame,
9+
Index,
10+
Series,
11+
)
12+
import pandas._testing as tm
13+
14+
15+
def test_max_min_non_numeric():
16+
# #2700
17+
aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]})
18+
19+
result = aa.groupby("nn").max()
20+
assert "ss" in result
21+
22+
result = aa.groupby("nn").max(numeric_only=False)
23+
assert "ss" in result
24+
25+
result = aa.groupby("nn").min()
26+
assert "ss" in result
27+
28+
result = aa.groupby("nn").min(numeric_only=False)
29+
assert "ss" in result
30+
31+
32+
def test_max_min_object_multiple_columns(using_array_manager):
33+
# GH#41111 case where the aggregation is valid for some columns but not
34+
# others; we split object blocks column-wise, consistent with
35+
# DataFrame._reduce
36+
37+
df = DataFrame(
38+
{
39+
"A": [1, 1, 2, 2, 3],
40+
"B": [1, "foo", 2, "bar", False],
41+
"C": ["a", "b", "c", "d", "e"],
42+
}
43+
)
44+
df._consolidate_inplace() # should already be consolidate, but double-check
45+
if not using_array_manager:
46+
assert len(df._mgr.blocks) == 2
47+
48+
gb = df.groupby("A")
49+
50+
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
51+
result = gb.max(numeric_only=False)
52+
# "max" is valid for column "C" but not for "B"
53+
ei = Index([1, 2, 3], name="A")
54+
expected = DataFrame({"C": ["b", "d", "e"]}, index=ei)
55+
tm.assert_frame_equal(result, expected)
56+
57+
with tm.assert_produces_warning(FutureWarning, match="Dropping invalid"):
58+
result = gb.min(numeric_only=False)
59+
# "min" is valid for column "C" but not for "B"
60+
ei = Index([1, 2, 3], name="A")
61+
expected = DataFrame({"C": ["a", "c", "e"]}, index=ei)
62+
tm.assert_frame_equal(result, expected)
63+
64+
65+
def test_min_date_with_nans():
66+
# GH26321
67+
dates = pd.to_datetime(
68+
Series(["2019-05-09", "2019-05-09", "2019-05-09"]), format="%Y-%m-%d"
69+
).dt.date
70+
df = DataFrame({"a": [np.nan, "1", np.nan], "b": [0, 1, 1], "c": dates})
71+
72+
result = df.groupby("b", as_index=False)["c"].min()["c"]
73+
expected = pd.to_datetime(
74+
Series(["2019-05-09", "2019-05-09"], name="c"), format="%Y-%m-%d"
75+
).dt.date
76+
tm.assert_series_equal(result, expected)
77+
78+
result = df.groupby("b")["c"].min()
79+
expected.index.name = "b"
80+
tm.assert_series_equal(result, expected)
81+
82+
83+
def test_max_inat():
84+
# GH#40767 dont interpret iNaT as NaN
85+
ser = Series([1, iNaT])
86+
gb = ser.groupby([1, 1])
87+
88+
result = gb.max(min_count=2)
89+
expected = Series({1: 1}, dtype=np.int64)
90+
tm.assert_series_equal(result, expected, check_exact=True)
91+
92+
result = gb.min(min_count=2)
93+
expected = Series({1: iNaT}, dtype=np.int64)
94+
tm.assert_series_equal(result, expected, check_exact=True)
95+
96+
# not enough entries -> gets masked to NaN
97+
result = gb.min(min_count=3)
98+
expected = Series({1: np.nan})
99+
tm.assert_series_equal(result, expected, check_exact=True)
100+
101+
102+
def test_max_inat_not_all_na():
103+
# GH#40767 dont interpret iNaT as NaN
104+
105+
# make sure we dont round iNaT+1 to iNaT
106+
ser = Series([1, iNaT, 2, iNaT + 1])
107+
gb = ser.groupby([1, 2, 3, 3])
108+
result = gb.min(min_count=2)
109+
110+
# Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy
111+
expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1})
112+
tm.assert_series_equal(result, expected, check_exact=True)
113+
114+
115+
@pytest.mark.parametrize("func", ["min", "max"])
116+
def test_groupby_aggregate_period_column(func):
117+
# GH 31471
118+
groups = [1, 2]
119+
periods = pd.period_range("2020", periods=2, freq="Y")
120+
df = DataFrame({"a": groups, "b": periods})
121+
122+
result = getattr(df.groupby("a")["b"], func)()
123+
idx = pd.Int64Index([1, 2], name="a")
124+
expected = Series(periods, index=idx, name="b")
125+
126+
tm.assert_series_equal(result, expected)
127+
128+
129+
@pytest.mark.parametrize("func", ["min", "max"])
130+
def test_groupby_aggregate_period_frame(func):
131+
# GH 31471
132+
groups = [1, 2]
133+
periods = pd.period_range("2020", periods=2, freq="Y")
134+
df = DataFrame({"a": groups, "b": periods})
135+
136+
result = getattr(df.groupby("a"), func)()
137+
idx = pd.Int64Index([1, 2], name="a")
138+
expected = DataFrame({"b": periods}, index=idx)
139+
140+
tm.assert_frame_equal(result, expected)
141+
142+
143+
def test_aggregate_numeric_object_dtype():
144+
# https://github.com/pandas-dev/pandas/issues/39329
145+
# simplified case: multiple object columns where one is all-NaN
146+
# -> gets split as the all-NaN is inferred as float
147+
df = DataFrame(
148+
{"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": [np.nan] * 4},
149+
).astype(object)
150+
result = df.groupby("key").min()
151+
expected = DataFrame(
152+
{"key": ["A", "B"], "col1": ["a", "c"], "col2": [np.nan, np.nan]}
153+
).set_index("key")
154+
tm.assert_frame_equal(result, expected)
155+
156+
# same but with numbers
157+
df = DataFrame(
158+
{"key": ["A", "A", "B", "B"], "col1": list("abcd"), "col2": range(4)},
159+
).astype(object)
160+
result = df.groupby("key").min()
161+
expected = DataFrame(
162+
{"key": ["A", "B"], "col1": ["a", "c"], "col2": [0, 2]}
163+
).set_index("key")
164+
tm.assert_frame_equal(result, expected)
165+
166+
167+
@pytest.mark.parametrize("func", ["min", "max"])
168+
def test_aggregate_categorical_lost_index(func: str):
169+
# GH: 28641 groupby drops index, when grouping over categorical column with min/max
170+
ds = Series(["b"], dtype="category").cat.as_ordered()
171+
df = DataFrame({"A": [1997], "B": ds})
172+
result = df.groupby("A").agg({"B": func})
173+
expected = DataFrame({"B": ["b"]}, index=Index([1997], name="A"))
174+
175+
# ordered categorical dtype should be preserved
176+
expected["B"] = expected["B"].astype(ds.dtype)
177+
178+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)