Skip to content

Commit 1c1b158

Browse files
authored
BUG, DEPR, PERF: GroupBy aggregations (#52128)
* BUG, DEPR, PERF: GroupBy aggregations * GH ref
1 parent aff52bf commit 1c1b158

File tree

4 files changed

+95
-37
lines changed

4 files changed

+95
-37
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Deprecations
100100
~~~~~~~~~~~~
101101
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
102102
- Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
103+
- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)
103104
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
104105
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
105106
- Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`)
@@ -228,6 +229,7 @@ Groupby/resample/rolling
228229
grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
229230
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
230231
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
232+
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`)
231233
-
232234

233235
Reshaping

pandas/core/groupby/groupby.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -1500,13 +1500,7 @@ def _agg_py_fallback(
15001500
# TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
15011501
res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
15021502

1503-
if isinstance(values, Categorical):
1504-
# Because we only get here with known dtype-preserving
1505-
# reductions, we cast back to Categorical.
1506-
# TODO: if we ever get "rank" working, exclude it here.
1507-
res_values = type(values)._from_sequence(res_values, dtype=values.dtype)
1508-
1509-
elif ser.dtype == object:
1503+
if ser.dtype == object:
15101504
res_values = res_values.astype(object, copy=False)
15111505

15121506
# If we are DataFrameGroupBy and went through a SeriesGroupByPath
@@ -1544,8 +1538,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
15441538
# and non-applicable functions
15451539
# try to python agg
15461540
# TODO: shouldn't min_count matter?
1547-
if how in ["any", "all"]:
1548-
raise # TODO: re-raise as TypeError?
1541+
if how in ["any", "all", "std", "sem"]:
1542+
raise # TODO: re-raise as TypeError? should not be reached
15491543
result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
15501544

15511545
return result

pandas/core/groupby/ops.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
Sequence,
1919
final,
2020
)
21+
import warnings
2122

2223
import numpy as np
2324

@@ -37,6 +38,7 @@
3738
)
3839
from pandas.errors import AbstractMethodError
3940
from pandas.util._decorators import cache_readonly
41+
from pandas.util._exceptions import find_stack_level
4042

4143
from pandas.core.dtypes.cast import (
4244
maybe_cast_pointwise_result,
@@ -258,19 +260,38 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
258260
# case that would fail to raise
259261
raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
260262
if how not in ["rank", "any", "all", "first", "last", "min", "max"]:
261-
# only "rank" is implemented in cython
262-
raise NotImplementedError(f"{dtype} dtype not supported")
263+
if self.kind == "transform":
264+
raise TypeError(f"{dtype} type does not support {how} operations")
265+
raise TypeError(f"{dtype} dtype does not support aggregation '{how}'")
263266

264267
elif is_sparse(dtype):
265268
raise NotImplementedError(f"{dtype} dtype not supported")
266269
elif is_datetime64_any_dtype(dtype):
267270
# Adding/multiplying datetimes is not valid
268-
if how in ["sum", "prod", "cumsum", "cumprod"]:
271+
if how in ["sum", "prod", "cumsum", "cumprod", "var"]:
269272
raise TypeError(f"datetime64 type does not support {how} operations")
273+
if how in ["any", "all"]:
274+
# GH#34479
275+
warnings.warn(
276+
f"'{how}' with datetime64 dtypes is deprecated and will raise in a "
277+
f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.",
278+
FutureWarning,
279+
stacklevel=find_stack_level(),
280+
)
281+
270282
elif is_period_dtype(dtype):
271283
# Adding/multiplying Periods is not valid
272-
if how in ["sum", "prod", "cumsum", "cumprod"]:
284+
if how in ["sum", "prod", "cumsum", "cumprod", "var"]:
273285
raise TypeError(f"Period type does not support {how} operations")
286+
if how in ["any", "all"]:
287+
# GH#34479
288+
warnings.warn(
289+
f"'{how}' with PeriodDtype is deprecated and will raise in a "
290+
f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.",
291+
FutureWarning,
292+
stacklevel=find_stack_level(),
293+
)
294+
274295
elif is_timedelta64_dtype(dtype):
275296
# timedeltas we can add but not multiply
276297
if how in ["prod", "cumprod"]:

pandas/tests/groupby/test_raises.py

+65-24
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,23 @@ def test_groupby_raises_datetime(how, by, groupby_series, groupby_func):
231231
"skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
232232
"std": (None, ""),
233233
"sum": (TypeError, "datetime64 type does not support sum operations"),
234-
"var": (None, ""),
234+
"var": (TypeError, "datetime64 type does not support var operations"),
235235
}[groupby_func]
236236

237237
if klass is None:
238-
if how == "method":
239-
getattr(gb, groupby_func)(*args)
240-
elif how == "agg":
241-
gb.agg(groupby_func, *args)
242-
else:
243-
gb.transform(groupby_func, *args)
238+
warn = None
239+
warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated"
240+
if groupby_func in ["any", "all"]:
241+
warn = FutureWarning
242+
243+
with tm.assert_produces_warning(warn, match=warn_msg):
244+
if how == "method":
245+
getattr(gb, groupby_func)(*args)
246+
elif how == "agg":
247+
gb.agg(groupby_func, *args)
248+
else:
249+
gb.transform(groupby_func, *args)
250+
244251
else:
245252
with pytest.raises(klass, match=msg):
246253
if how == "method":
@@ -383,11 +390,21 @@ def test_groupby_raises_category(
383390
"max": (None, ""),
384391
"mean": (
385392
TypeError,
386-
"'Categorical' with dtype category does not support reduction 'mean'",
393+
"|".join(
394+
[
395+
"'Categorical' .* does not support reduction 'mean'",
396+
"category dtype does not support aggregation 'mean'",
397+
]
398+
),
387399
),
388400
"median": (
389401
TypeError,
390-
"'Categorical' with dtype category does not support reduction 'median'",
402+
"|".join(
403+
[
404+
"'Categorical' .* does not support reduction 'median'",
405+
"category dtype does not support aggregation 'median'",
406+
]
407+
),
391408
),
392409
"min": (None, ""),
393410
"ngroup": (None, ""),
@@ -401,7 +418,12 @@ def test_groupby_raises_category(
401418
"rank": (None, ""),
402419
"sem": (
403420
TypeError,
404-
"'Categorical' with dtype category does not support reduction 'sem'",
421+
"|".join(
422+
[
423+
"'Categorical' .* does not support reduction 'sem'",
424+
"category dtype does not support aggregation 'sem'",
425+
]
426+
),
405427
),
406428
"shift": (None, ""),
407429
"size": (None, ""),
@@ -411,12 +433,22 @@ def test_groupby_raises_category(
411433
),
412434
"std": (
413435
TypeError,
414-
"'Categorical' with dtype category does not support reduction 'std'",
436+
"|".join(
437+
[
438+
"'Categorical' .* does not support reduction 'std'",
439+
"category dtype does not support aggregation 'std'",
440+
]
441+
),
415442
),
416443
"sum": (TypeError, "category type does not support sum operations"),
417444
"var": (
418445
TypeError,
419-
"'Categorical' with dtype category does not support reduction 'var'",
446+
"|".join(
447+
[
448+
"'Categorical' .* does not support reduction 'var'",
449+
"category dtype does not support aggregation 'var'",
450+
]
451+
),
420452
),
421453
}[groupby_func]
422454

@@ -489,7 +521,7 @@ def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np):
489521
np.sum: (TypeError, "category type does not support sum operations"),
490522
np.mean: (
491523
TypeError,
492-
"'Categorical' with dtype category does not support reduction 'mean'",
524+
"category dtype does not support aggregation 'mean'",
493525
),
494526
}[groupby_func_np]
495527

@@ -585,14 +617,8 @@ def test_groupby_raises_category_on_category(
585617
else (None, ""),
586618
"last": (None, ""),
587619
"max": (None, ""),
588-
"mean": (
589-
TypeError,
590-
"'Categorical' with dtype category does not support reduction 'mean'",
591-
),
592-
"median": (
593-
TypeError,
594-
"'Categorical' with dtype category does not support reduction 'median'",
595-
),
620+
"mean": (TypeError, "category dtype does not support aggregation 'mean'"),
621+
"median": (TypeError, "category dtype does not support aggregation 'median'"),
596622
"min": (None, ""),
597623
"ngroup": (None, ""),
598624
"nunique": (None, ""),
@@ -602,7 +628,12 @@ def test_groupby_raises_category_on_category(
602628
"rank": (None, ""),
603629
"sem": (
604630
TypeError,
605-
"'Categorical' with dtype category does not support reduction 'sem'",
631+
"|".join(
632+
[
633+
"'Categorical' .* does not support reduction 'sem'",
634+
"category dtype does not support aggregation 'sem'",
635+
]
636+
),
606637
),
607638
"shift": (None, ""),
608639
"size": (None, ""),
@@ -612,12 +643,22 @@ def test_groupby_raises_category_on_category(
612643
),
613644
"std": (
614645
TypeError,
615-
"'Categorical' with dtype category does not support reduction 'std'",
646+
"|".join(
647+
[
648+
"'Categorical' .* does not support reduction 'std'",
649+
"category dtype does not support aggregation 'std'",
650+
]
651+
),
616652
),
617653
"sum": (TypeError, "category type does not support sum operations"),
618654
"var": (
619655
TypeError,
620-
"'Categorical' with dtype category does not support reduction 'var'",
656+
"|".join(
657+
[
658+
"'Categorical' .* does not support reduction 'var'",
659+
"category dtype does not support aggregation 'var'",
660+
]
661+
),
621662
),
622663
}[groupby_func]
623664

0 commit comments

Comments
 (0)