diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 706dcaa34366a..a300736f948c1 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -100,6 +100,7 @@ Deprecations ~~~~~~~~~~~~ - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`) - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`) +- Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecated :meth:`DataFrameGroupBy.dtypes`, check ``dtypes`` on the underlying object instead (:issue:`51045`) @@ -226,6 +227,7 @@ Groupby/resample/rolling grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex` or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) +- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) - Reshaping diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 12ecf5d54b356..e84a23be6c5bb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1500,13 +1500,7 @@ def _agg_py_fallback( # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype? res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True) - if isinstance(values, Categorical): - # Because we only get here with known dtype-preserving - # reductions, we cast back to Categorical. - # TODO: if we ever get "rank" working, exclude it here. - res_values = type(values)._from_sequence(res_values, dtype=values.dtype) - - elif ser.dtype == object: + if ser.dtype == object: res_values = res_values.astype(object, copy=False) # If we are DataFrameGroupBy and went through a SeriesGroupByPath @@ -1544,8 +1538,8 @@ def array_func(values: ArrayLike) -> ArrayLike: # and non-applicable functions # try to python agg # TODO: shouldn't min_count matter? - if how in ["any", "all"]: - raise # TODO: re-raise as TypeError? + if how in ["any", "all", "std", "sem"]: + raise # TODO: re-raise as TypeError? should not be reached result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt) return result diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4f21d90ac5116..dad188e2d9304 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -18,6 +18,7 @@ Sequence, final, ) +import warnings import numpy as np @@ -37,6 +38,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( maybe_cast_pointwise_result, @@ -258,19 +260,38 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): # case that would fail to raise raise TypeError(f"Cannot perform {how} with non-ordered Categorical") if how not in ["rank", "any", "all", "first", "last", "min", "max"]: - # only "rank" is implemented in cython - raise NotImplementedError(f"{dtype} dtype not supported") + if self.kind == "transform": + raise TypeError(f"{dtype} type does not support {how} operations") + raise TypeError(f"{dtype} dtype does not support aggregation '{how}'") elif is_sparse(dtype): raise NotImplementedError(f"{dtype} dtype not supported") elif is_datetime64_any_dtype(dtype): # Adding/multiplying datetimes is not valid - if how in ["sum", "prod", "cumsum", "cumprod"]: + if how in ["sum", "prod", "cumsum", "cumprod", "var"]: raise TypeError(f"datetime64 type does not support {how} operations") + if how in ["any", "all"]: + # GH#34479 + warnings.warn( + f"'{how}' with datetime64 dtypes is deprecated and will raise in a " + f"future version. Use (obj != pd.Timestamp(0)).{how}() instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif is_period_dtype(dtype): # Adding/multiplying Periods is not valid - if how in ["sum", "prod", "cumsum", "cumprod"]: + if how in ["sum", "prod", "cumsum", "cumprod", "var"]: raise TypeError(f"Period type does not support {how} operations") + if how in ["any", "all"]: + # GH#34479 + warnings.warn( + f"'{how}' with PeriodDtype is deprecated and will raise in a " + f"future version. Use (obj != pd.Period(0, freq)).{how}() instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif is_timedelta64_dtype(dtype): # timedeltas we can add but not multiply if how in ["prod", "cumprod"]: diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 60d4f98aa33f7..bd3686354e432 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -231,16 +231,23 @@ def test_groupby_raises_datetime(how, by, groupby_series, groupby_func): "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"), "std": (None, ""), "sum": (TypeError, "datetime64 type does not support sum operations"), - "var": (None, ""), + "var": (TypeError, "datetime64 type does not support var operations"), }[groupby_func] if klass is None: - if how == "method": - getattr(gb, groupby_func)(*args) - elif how == "agg": - gb.agg(groupby_func, *args) - else: - gb.transform(groupby_func, *args) + warn = None + warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated" + if groupby_func in ["any", "all"]: + warn = FutureWarning + + with tm.assert_produces_warning(warn, match=warn_msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: with pytest.raises(klass, match=msg): if how == "method": @@ -383,11 +390,21 @@ def test_groupby_raises_category( "max": (None, ""), "mean": ( TypeError, - "'Categorical' with dtype category does not support reduction 'mean'", + "|".join( + [ + "'Categorical' .* does not support reduction 'mean'", + "category dtype does not support aggregation 'mean'", + ] + ), ), "median": ( TypeError, - "'Categorical' with dtype category does not support reduction 'median'", + "|".join( + [ + "'Categorical' .* does not support reduction 'median'", + "category dtype does not support aggregation 'median'", + ] + ), ), "min": (None, ""), "ngroup": (None, ""), @@ -401,7 +418,12 @@ def test_groupby_raises_category( "rank": (None, ""), "sem": ( TypeError, - "'Categorical' with dtype category does not support reduction 'sem'", + "|".join( + [ + "'Categorical' .* does not support reduction 'sem'", + "category dtype does not support aggregation 'sem'", + ] + ), ), "shift": (None, ""), "size": (None, ""), @@ -411,12 +433,22 @@ def test_groupby_raises_category( ), "std": ( TypeError, - "'Categorical' with dtype category does not support reduction 'std'", + "|".join( + [ + "'Categorical' .* does not support reduction 'std'", + "category dtype does not support aggregation 'std'", + ] + ), ), "sum": (TypeError, "category type does not support sum operations"), "var": ( TypeError, - "'Categorical' with dtype category does not support reduction 'var'", + "|".join( + [ + "'Categorical' .* does not support reduction 'var'", + "category dtype does not support aggregation 'var'", + ] + ), ), }[groupby_func] @@ -489,7 +521,7 @@ def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np): np.sum: (TypeError, "category type does not support sum operations"), np.mean: ( TypeError, - "'Categorical' with dtype category does not support reduction 'mean'", + "category dtype does not support aggregation 'mean'", ), }[groupby_func_np] @@ -585,14 +617,8 @@ def test_groupby_raises_category_on_category( else (None, ""), "last": (None, ""), "max": (None, ""), - "mean": ( - TypeError, - "'Categorical' with dtype category does not support reduction 'mean'", - ), - "median": ( - TypeError, - "'Categorical' with dtype category does not support reduction 'median'", - ), + "mean": (TypeError, "category dtype does not support aggregation 'mean'"), + "median": (TypeError, "category dtype does not support aggregation 'median'"), "min": (None, ""), "ngroup": (None, ""), "nunique": (None, ""), @@ -602,7 +628,12 @@ def test_groupby_raises_category_on_category( "rank": (None, ""), "sem": ( TypeError, - "'Categorical' with dtype category does not support reduction 'sem'", + "|".join( + [ + "'Categorical' .* does not support reduction 'sem'", + "category dtype does not support aggregation 'sem'", + ] + ), ), "shift": (None, ""), "size": (None, ""), @@ -612,12 +643,22 @@ def test_groupby_raises_category_on_category( ), "std": ( TypeError, - "'Categorical' with dtype category does not support reduction 'std'", + "|".join( + [ + "'Categorical' .* does not support reduction 'std'", + "category dtype does not support aggregation 'std'", + ] + ), ), "sum": (TypeError, "category type does not support sum operations"), "var": ( TypeError, - "'Categorical' with dtype category does not support reduction 'var'", + "|".join( + [ + "'Categorical' .* does not support reduction 'var'", + "category dtype does not support aggregation 'var'", + ] + ), ), }[groupby_func]