diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 63c414d96c8de..c1b5897164d76 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,4 +1,4 @@ -from typing import Any, Sequence, TypeVar +from typing import Any, Optional, Sequence, TypeVar import numpy as np @@ -254,6 +254,11 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): msg = f"'{type(self).__name__}' does not implement reduction '{name}'" raise TypeError(msg) + def _wrap_reduction_result(self, axis: Optional[int], result): + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + # ------------------------------------------------------------------------ def __repr__(self) -> str: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index edbf24ca87f5c..57d934a633911 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1957,7 +1957,7 @@ def min(self, *, skipna=True, **kwargs): return np.nan else: pointer = self._codes.min() - return self.categories[pointer] + return self._wrap_reduction_result(None, pointer) @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") def max(self, *, skipna=True, **kwargs): @@ -1993,7 +1993,7 @@ def max(self, *, skipna=True, **kwargs): return np.nan else: pointer = self._codes.max() - return self.categories[pointer] + return self._wrap_reduction_result(None, pointer) def mode(self, dropna=True): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7a0d88f29b9b0..8d90035491d28 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1283,9 +1283,7 @@ def min(self, *, axis=None, skipna=True, **kwargs): return self._from_backing_data(result) result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) - if lib.is_scalar(result): - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def max(self, *, axis=None, skipna=True, **kwargs): """ @@ -1316,9 +1314,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): return self._from_backing_data(result) result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) - if lib.is_scalar(result): - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def mean(self, *, skipna=True, axis: Optional[int] = 0): """ @@ -1357,9 +1353,7 @@ def mean(self, *, skipna=True, axis: Optional[int] = 0): result = nanops.nanmean( self._ndarray, axis=axis, skipna=skipna, mask=self.isna() ) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): nv.validate_median((), kwargs) @@ -1378,9 +1372,7 @@ def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): return self._from_backing_data(result) result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) class DatelikeOps(DatetimeLikeArrayMixin): diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index e1a424b719a4a..20fae20c395e6 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -12,7 +12,6 @@ from pandas.core.dtypes.missing import isna from pandas.core import nanops, ops -from pandas.core.array_algos import masked_reductions from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -273,39 +272,46 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: def any(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_any((), dict(out=out, keepdims=keepdims)) - return nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def all(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_all((), dict(out=out, keepdims=keepdims)) - return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) - def min(self, *, skipna: bool = True, **kwargs) -> Scalar: + def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: nv.validate_min((), kwargs) - return masked_reductions.min( - values=self.to_numpy(), mask=self.isna(), skipna=skipna + result = nanops.nanmin( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) + return self._wrap_reduction_result(axis, result) - def max(self, *, skipna: bool = True, **kwargs) -> Scalar: + def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: nv.validate_max((), kwargs) - return masked_reductions.max( - values=self.to_numpy(), mask=self.isna(), skipna=skipna + result = nanops.nanmax( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) + return self._wrap_reduction_result(axis, result) def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_sum((), kwargs) - return nanops.nansum( + result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) + return self._wrap_reduction_result(axis, result) def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_prod((), kwargs) - return nanops.nanprod( + result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) + return self._wrap_reduction_result(axis, result) def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims)) - return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def median( self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True @@ -313,7 +319,8 @@ def median( nv.validate_median( (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) ) - return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def std( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -321,7 +328,8 @@ def std( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" ) - return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def var( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -329,7 +337,8 @@ def var( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var" ) - return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def sem( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -337,19 +346,22 @@ def sem( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem" ) - return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt" ) - return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew" ) - return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) # ------------------------------------------------------------------------ # Additional Methods diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8231a5fa0509b..a51dd1098a359 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -3,6 +3,8 @@ import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import Scalar +from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype from pandas.core.dtypes.common import ( @@ -15,6 +17,7 @@ ) from pandas.core import ops +from pandas.core.array_algos import masked_reductions from pandas.core.arrays import IntegerArray, PandasArray from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array @@ -301,6 +304,20 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = masked_reductions.min( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = masked_reductions.max( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + def value_counts(self, dropna=False): from pandas import value_counts diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8a87df18b6adb..c227c071546ce 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -381,9 +381,7 @@ def sum( result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def std( self, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8e917bb770247..b045e789b52a8 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -344,6 +344,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None): assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan + if tz is not None: # we get here e.g. via nanmean when we call it on a DTA[tz] result = Timestamp(result, tz=tz)