Skip to content

Commit d6c6788

Browse files
authored
TST: fixturize unfocused test (#38284)
1 parent 11d0176 commit d6c6788

File tree

1 file changed

+132
-114
lines changed

1 file changed

+132
-114
lines changed

pandas/tests/groupby/test_function.py

+132-114
Original file line numberDiff line numberDiff line change
@@ -145,140 +145,158 @@ def test_builtins_apply(keys, f):
145145
tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
146146

147147

148-
def test_arg_passthru():
149-
# make sure that we are passing thru kwargs
150-
# to our agg functions
148+
class TestNumericOnly:
149+
# make sure that we are passing thru kwargs to our agg functions
151150

152-
# GH3668
153-
# GH5724
154-
df = DataFrame(
155-
{
156-
"group": [1, 1, 2],
157-
"int": [1, 2, 3],
158-
"float": [4.0, 5.0, 6.0],
159-
"string": list("abc"),
160-
"category_string": Series(list("abc")).astype("category"),
161-
"category_int": [7, 8, 9],
162-
"datetime": pd.date_range("20130101", periods=3),
163-
"datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
164-
"timedelta": pd.timedelta_range("1 s", periods=3, freq="s"),
165-
},
166-
columns=[
167-
"group",
168-
"int",
169-
"float",
170-
"string",
171-
"category_string",
172-
"category_int",
173-
"datetime",
174-
"datetimetz",
175-
"timedelta",
176-
],
177-
)
151+
@pytest.fixture
152+
def df(self):
153+
# GH3668
154+
# GH5724
155+
df = DataFrame(
156+
{
157+
"group": [1, 1, 2],
158+
"int": [1, 2, 3],
159+
"float": [4.0, 5.0, 6.0],
160+
"string": list("abc"),
161+
"category_string": Series(list("abc")).astype("category"),
162+
"category_int": [7, 8, 9],
163+
"datetime": date_range("20130101", periods=3),
164+
"datetimetz": date_range("20130101", periods=3, tz="US/Eastern"),
165+
"timedelta": pd.timedelta_range("1 s", periods=3, freq="s"),
166+
},
167+
columns=[
168+
"group",
169+
"int",
170+
"float",
171+
"string",
172+
"category_string",
173+
"category_int",
174+
"datetime",
175+
"datetimetz",
176+
"timedelta",
177+
],
178+
)
179+
return df
178180

179-
expected_columns_numeric = Index(["int", "float", "category_int"])
181+
@pytest.mark.parametrize("method", ["mean", "median"])
182+
def test_averages(self, df, method):
183+
# mean / median
184+
expected_columns_numeric = Index(["int", "float", "category_int"])
180185

181-
# mean / median
182-
expected = DataFrame(
183-
{
184-
"category_int": [7.5, 9],
185-
"float": [4.5, 6.0],
186-
"timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")],
187-
"int": [1.5, 3],
188-
"datetime": [
189-
Timestamp("2013-01-01 12:00:00"),
190-
Timestamp("2013-01-03 00:00:00"),
191-
],
192-
"datetimetz": [
193-
Timestamp("2013-01-01 12:00:00", tz="US/Eastern"),
194-
Timestamp("2013-01-03 00:00:00", tz="US/Eastern"),
186+
gb = df.groupby("group")
187+
expected = DataFrame(
188+
{
189+
"category_int": [7.5, 9],
190+
"float": [4.5, 6.0],
191+
"timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")],
192+
"int": [1.5, 3],
193+
"datetime": [
194+
Timestamp("2013-01-01 12:00:00"),
195+
Timestamp("2013-01-03 00:00:00"),
196+
],
197+
"datetimetz": [
198+
Timestamp("2013-01-01 12:00:00", tz="US/Eastern"),
199+
Timestamp("2013-01-03 00:00:00", tz="US/Eastern"),
200+
],
201+
},
202+
index=Index([1, 2], name="group"),
203+
columns=[
204+
"int",
205+
"float",
206+
"category_int",
207+
"datetime",
208+
"datetimetz",
209+
"timedelta",
195210
],
196-
},
197-
index=Index([1, 2], name="group"),
198-
columns=["int", "float", "category_int", "datetime", "datetimetz", "timedelta"],
199-
)
200-
201-
for attr in ["mean", "median"]:
202-
result = getattr(df.groupby("group"), attr)()
203-
tm.assert_index_equal(result.columns, expected_columns_numeric)
211+
)
204212

205-
result = getattr(df.groupby("group"), attr)(numeric_only=False)
213+
result = getattr(gb, method)(numeric_only=False)
206214
tm.assert_frame_equal(result.reindex_like(expected), expected)
207215

208-
# TODO: min, max *should* handle
209-
# categorical (ordered) dtype
210-
expected_columns = Index(
211-
[
212-
"int",
213-
"float",
214-
"string",
215-
"category_int",
216-
"datetime",
217-
"datetimetz",
218-
"timedelta",
219-
]
220-
)
221-
for attr in ["min", "max"]:
222-
result = getattr(df.groupby("group"), attr)()
223-
tm.assert_index_equal(result.columns, expected_columns)
216+
expected_columns = expected.columns
224217

225-
result = getattr(df.groupby("group"), attr)(numeric_only=False)
226-
tm.assert_index_equal(result.columns, expected_columns)
218+
self._check(df, method, expected_columns, expected_columns_numeric)
227219

228-
expected_columns = Index(
229-
[
230-
"int",
231-
"float",
232-
"string",
233-
"category_string",
234-
"category_int",
235-
"datetime",
236-
"datetimetz",
237-
"timedelta",
238-
]
239-
)
240-
for attr in ["first", "last"]:
241-
result = getattr(df.groupby("group"), attr)()
242-
tm.assert_index_equal(result.columns, expected_columns)
220+
@pytest.mark.parametrize("method", ["min", "max"])
221+
def test_extrema(self, df, method):
222+
# TODO: min, max *should* handle
223+
# categorical (ordered) dtype
243224

244-
result = getattr(df.groupby("group"), attr)(numeric_only=False)
245-
tm.assert_index_equal(result.columns, expected_columns)
225+
expected_columns = Index(
226+
[
227+
"int",
228+
"float",
229+
"string",
230+
"category_int",
231+
"datetime",
232+
"datetimetz",
233+
"timedelta",
234+
]
235+
)
236+
expected_columns_numeric = expected_columns
246237

247-
expected_columns = Index(["int", "float", "string", "category_int", "timedelta"])
238+
self._check(df, method, expected_columns, expected_columns_numeric)
248239

249-
result = df.groupby("group").sum()
250-
tm.assert_index_equal(result.columns, expected_columns_numeric)
240+
@pytest.mark.parametrize("method", ["first", "last"])
241+
def test_first_last(self, df, method):
251242

252-
result = df.groupby("group").sum(numeric_only=False)
253-
tm.assert_index_equal(result.columns, expected_columns)
243+
expected_columns = Index(
244+
[
245+
"int",
246+
"float",
247+
"string",
248+
"category_string",
249+
"category_int",
250+
"datetime",
251+
"datetimetz",
252+
"timedelta",
253+
]
254+
)
255+
expected_columns_numeric = expected_columns
254256

255-
expected_columns = Index(["int", "float", "category_int"])
256-
for attr in ["prod", "cumprod"]:
257-
result = getattr(df.groupby("group"), attr)()
258-
tm.assert_index_equal(result.columns, expected_columns_numeric)
257+
self._check(df, method, expected_columns, expected_columns_numeric)
259258

260-
result = getattr(df.groupby("group"), attr)(numeric_only=False)
261-
tm.assert_index_equal(result.columns, expected_columns)
259+
@pytest.mark.parametrize("method", ["sum", "cumsum"])
260+
def test_sum_cumsum(self, df, method):
262261

263-
# like min, max, but don't include strings
264-
expected_columns = Index(
265-
["int", "float", "category_int", "datetime", "datetimetz", "timedelta"]
266-
)
267-
for attr in ["cummin", "cummax"]:
268-
result = getattr(df.groupby("group"), attr)()
269-
# GH 15561: numeric_only=False set by default like min/max
270-
tm.assert_index_equal(result.columns, expected_columns)
262+
expected_columns_numeric = Index(["int", "float", "category_int"])
263+
expected_columns = Index(
264+
["int", "float", "string", "category_int", "timedelta"]
265+
)
266+
if method == "cumsum":
267+
# cumsum loses string
268+
expected_columns = Index(["int", "float", "category_int", "timedelta"])
271269

272-
result = getattr(df.groupby("group"), attr)(numeric_only=False)
273-
tm.assert_index_equal(result.columns, expected_columns)
270+
self._check(df, method, expected_columns, expected_columns_numeric)
271+
272+
@pytest.mark.parametrize("method", ["prod", "cumprod"])
273+
def test_prod_cumprod(self, df, method):
274+
275+
expected_columns = Index(["int", "float", "category_int"])
276+
expected_columns_numeric = expected_columns
277+
278+
self._check(df, method, expected_columns, expected_columns_numeric)
274279

275-
expected_columns = Index(["int", "float", "category_int", "timedelta"])
280+
@pytest.mark.parametrize("method", ["cummin", "cummax"])
281+
def test_cummin_cummax(self, df, method):
282+
# like min, max, but don't include strings
283+
expected_columns = Index(
284+
["int", "float", "category_int", "datetime", "datetimetz", "timedelta"]
285+
)
286+
287+
# GH#15561: numeric_only=False set by default like min/max
288+
expected_columns_numeric = expected_columns
289+
290+
self._check(df, method, expected_columns, expected_columns_numeric)
276291

277-
result = getattr(df.groupby("group"), "cumsum")()
278-
tm.assert_index_equal(result.columns, expected_columns_numeric)
292+
def _check(self, df, method, expected_columns, expected_columns_numeric):
293+
gb = df.groupby("group")
279294

280-
result = getattr(df.groupby("group"), "cumsum")(numeric_only=False)
281-
tm.assert_index_equal(result.columns, expected_columns)
295+
result = getattr(gb, method)()
296+
tm.assert_index_equal(result.columns, expected_columns_numeric)
297+
298+
result = getattr(gb, method)(numeric_only=False)
299+
tm.assert_index_equal(result.columns, expected_columns)
282300

283301

284302
class TestGroupByNonCythonPaths:

0 commit comments

Comments
 (0)