diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a381a7bcb33f5..4a721ae0d4bf6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -46,7 +46,6 @@ from pandas.core.dtypes.cast import ( find_common_type, - maybe_cast_result_dtype, maybe_downcast_numeric, ) from pandas.core.dtypes.common import ( @@ -58,7 +57,6 @@ is_interval_dtype, is_numeric_dtype, is_scalar, - needs_i8_conversion, ) from pandas.core.dtypes.missing import ( isna, @@ -1104,13 +1102,11 @@ def _cython_agg_manager( using_array_manager = isinstance(data, ArrayManager) - def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: + def cast_agg_result( + result: ArrayLike, values: ArrayLike, how: str + ) -> ArrayLike: # see if we can cast the values to the desired dtype # this may not be the original dtype - assert not isinstance(result, DataFrame) - - dtype = maybe_cast_result_dtype(values.dtype, how) - result = maybe_downcast_numeric(result, dtype) if isinstance(values, Categorical) and isinstance(result, np.ndarray): # If the Categorical op didn't raise, it is dtype-preserving @@ -1125,6 +1121,7 @@ def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike: ): # We went through a SeriesGroupByPath and need to reshape # GH#32223 includes case with IntegerArray values + # We only get here with values.dtype == object result = result.reshape(1, -1) # test_groupby_duplicate_columns gets here with # result.dtype == int64, values.dtype=object, how="min" @@ -1140,8 +1137,11 @@ def py_fallback(values: ArrayLike) -> ArrayLike: # call our grouper again with only this block if values.ndim == 1: + # We only get here with ExtensionArray + obj = Series(values) else: + # We only get here with values.dtype == object # TODO special case not needed with ArrayManager obj = DataFrame(values.T) if obj.shape[1] == 1: @@ -1193,7 +1193,8 @@ def array_func(values: ArrayLike) -> ArrayLike: result = py_fallback(values) - return cast_agg_result(result, values, how) + return cast_agg_result(result, values, how) + return result # TypeError -> we may have an exception in trying to aggregate # continue and exclude the block @@ -1366,11 +1367,7 @@ def _wrap_applied_output_series( # if we have date/time like in the original, then coerce dates # as we are stacking can easily have object dtypes here - so = self._selected_obj - if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): - result = result._convert(datetime=True) - else: - result = result._convert(datetime=True) + result = result._convert(datetime=True) if not self.as_index: self._insert_inaxis_grouper_inplace(result) @@ -1507,7 +1504,7 @@ def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFram try: res_fast = fast_path(group) except AssertionError: - raise + raise # pragma: no cover except Exception: # GH#29631 For user-defined function, we can't predict what may be # raised; see test_transform.test_transform_fastpath_raises diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 43db889618db6..f2fffe4c3741c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1033,7 +1033,7 @@ def _cython_transform( result = self.grouper._cython_operation( "transform", obj._values, how, axis, **kwargs ) - except NotImplementedError: + except (NotImplementedError, TypeError): continue key = base.OutputKey(label=name, position=idx) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 26bc9094fdef5..d9bf1adf74a5e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -136,23 +136,17 @@ def _get_cython_function( # see if there is a fused-type version of function # only valid for numeric - f = getattr(libgroupby, ftype, None) - if f is not None: - if is_numeric: - return f - elif dtype == object: - if "object" not in f.__signatures__: - # raise NotImplementedError here rather than TypeError later - raise NotImplementedError( - f"function is not implemented for this dtype: " - f"[how->{how},dtype->{dtype_str}]" - ) - return f - - raise NotImplementedError( - f"function is not implemented for this dtype: " - f"[how->{how},dtype->{dtype_str}]" - ) + f = getattr(libgroupby, ftype) + if is_numeric: + return f + elif dtype == object: + if "object" not in f.__signatures__: + # raise NotImplementedError here rather than TypeError later + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + return f def get_cython_func_and_vals(self, values: np.ndarray, is_numeric: bool): """ @@ -208,7 +202,14 @@ def disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): # never an invalid op for those dtypes, so return early as fastpath return - if is_categorical_dtype(dtype) or is_sparse(dtype): + if is_categorical_dtype(dtype): + # NotImplementedError for methods that can fall back to a + # non-cython implementation. + if how in ["add", "prod", "cumsum", "cumprod"]: + raise TypeError(f"{dtype} type does not support {how} operations") + raise NotImplementedError(f"{dtype} dtype not supported") + + elif is_sparse(dtype): # categoricals are only 1d, so we # are not setup for dim transforming raise NotImplementedError(f"{dtype} dtype not supported") @@ -216,14 +217,10 @@ def disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes if how in ["add", "prod", "cumsum", "cumprod"]: - raise NotImplementedError( - f"datetime64 type does not support {how} operations" - ) + raise TypeError(f"datetime64 type does not support {how} operations") elif is_timedelta64_dtype(dtype): if how in ["prod", "cumprod"]: - raise NotImplementedError( - f"timedelta64 type does not support {how} operations" - ) + raise TypeError(f"timedelta64 type does not support {how} operations") def get_output_shape(self, ngroups: int, values: np.ndarray) -> Shape: how = self.how