diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index db7289f7c3547..a80bd8ba76dac 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -27,11 +27,11 @@ F, Scalar, Shape, + npt, ) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import ( - get_dtype, is_any_int_dtype, is_bool_dtype, is_complex, @@ -209,8 +209,8 @@ def _get_fill_value( def _maybe_get_mask( - values: np.ndarray, skipna: bool, mask: np.ndarray | None -) -> np.ndarray | None: + values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None +) -> npt.NDArray[np.bool_] | None: """ Compute a mask if and only if necessary. @@ -239,7 +239,7 @@ def _maybe_get_mask( Returns ------- - Optional[np.ndarray] + Optional[np.ndarray[bool]] """ if mask is None: if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): @@ -257,8 +257,8 @@ def _get_values( skipna: bool, fill_value: Any = None, fill_value_typ: str | None = None, - mask: np.ndarray | None = None, -) -> tuple[np.ndarray, np.ndarray | None, np.dtype, np.dtype, Any]: + mask: npt.NDArray[np.bool_] | None = None, +) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. @@ -279,7 +279,7 @@ def _get_values( value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities - mask : Optional[np.ndarray] + mask : Optional[np.ndarray[bool]] nan-mask if known Returns @@ -396,7 +396,7 @@ def new_func( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, **kwargs, ): orig_values = values @@ -454,7 +454,7 @@ def nanany( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> bool: """ Check if any elements along an axis evaluate to True. @@ -500,7 +500,7 @@ def nanall( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> bool: """ Check if all elements along an axis evaluate to True. @@ -549,7 +549,7 @@ def nansum( axis: int | None = None, skipna: bool = True, min_count: int = 0, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Sum the elements along an axis ignoring NaNs @@ -592,7 +592,7 @@ def nansum( def _mask_datetimelike_result( result: np.ndarray | np.datetime64 | np.timedelta64, axis: int | None, - mask: np.ndarray, + mask: npt.NDArray[np.bool_], orig_values: np.ndarray, ) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType: if isinstance(result, np.ndarray): @@ -616,7 +616,7 @@ def nanmean( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Compute the mean of the element along an axis ignoring NaNs @@ -781,10 +781,10 @@ def get_empty_reduction_result( def _get_counts_nanvar( values_shape: Shape, - mask: np.ndarray | None, + mask: npt.NDArray[np.bool_] | None, axis: int | None, ddof: int, - dtype: Dtype = float, + dtype: np.dtype = np.dtype(np.float64), ) -> tuple[int | float | np.ndarray, int | float | np.ndarray]: """ Get the count of non-null values along an axis, accounting @@ -808,7 +808,6 @@ def _get_counts_nanvar( count : int, np.nan or np.ndarray d : int, np.nan or np.ndarray """ - dtype = get_dtype(dtype) count = _get_counts(values_shape, mask, axis, dtype=dtype) d = count - dtype.type(ddof) @@ -931,7 +930,7 @@ def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None): # unless we were dealing with a float array, in which case use the same # precision as the original values array. if is_float_dtype(dtype): - result = result.astype(dtype) + result = result.astype(dtype, copy=False) return result @@ -942,7 +941,7 @@ def nansem( axis: int | None = None, skipna: bool = True, ddof: int = 1, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Compute the standard error in the mean along given axis while ignoring NaNs @@ -993,7 +992,7 @@ def reduction( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> Dtype: values, mask, dtype, dtype_max, fill_value = _get_values( @@ -1025,7 +1024,7 @@ def nanargmax( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> int | np.ndarray: """ Parameters @@ -1071,7 +1070,7 @@ def nanargmin( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> int | np.ndarray: """ Parameters @@ -1117,7 +1116,7 @@ def nanskew( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Compute the sample skewness. @@ -1185,7 +1184,7 @@ def nanskew( dtype = values.dtype if is_float_dtype(dtype): - result = result.astype(dtype) + result = result.astype(dtype, copy=False) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) @@ -1204,7 +1203,7 @@ def nankurt( *, axis: int | None = None, skipna: bool = True, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Compute the sample excess kurtosis @@ -1285,7 +1284,7 @@ def nankurt( dtype = values.dtype if is_float_dtype(dtype): - result = result.astype(dtype) + result = result.astype(dtype, copy=False) if isinstance(result, np.ndarray): result = np.where(denominator == 0, 0, result) @@ -1301,7 +1300,7 @@ def nanprod( axis: int | None = None, skipna: bool = True, min_count: int = 0, - mask: np.ndarray | None = None, + mask: npt.NDArray[np.bool_] | None = None, ) -> float: """ Parameters @@ -1339,7 +1338,10 @@ def nanprod( def _maybe_arg_null_out( - result: np.ndarray, axis: int | None, mask: np.ndarray | None, skipna: bool + result: np.ndarray, + axis: int | None, + mask: npt.NDArray[np.bool_] | None, + skipna: bool, ) -> np.ndarray | int: # helper function for nanargmin/nanargmax if mask is None: @@ -1367,10 +1369,10 @@ def _maybe_arg_null_out( def _get_counts( - values_shape: tuple[int, ...], - mask: np.ndarray | None, + values_shape: Shape, + mask: npt.NDArray[np.bool_] | None, axis: int | None, - dtype: Dtype = float, + dtype: np.dtype = np.dtype(np.float64), ) -> int | float | np.ndarray: """ Get the count of non-null values along an axis @@ -1390,7 +1392,6 @@ def _get_counts( ------- count : scalar or array """ - dtype = get_dtype(dtype) if axis is None: if mask is not None: n = mask.size - mask.sum() @@ -1405,20 +1406,13 @@ def _get_counts( if is_scalar(count): return dtype.type(count) - try: - return count.astype(dtype) - except AttributeError: - # error: Argument "dtype" to "array" has incompatible type - # "Union[ExtensionDtype, dtype]"; expected "Union[dtype, None, type, - # _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]" - return np.array(count, dtype=dtype) # type: ignore[arg-type] + return count.astype(dtype, copy=False) def _maybe_null_out( result: np.ndarray | float | NaTType, axis: int | None, - mask: np.ndarray | None, + mask: npt.NDArray[np.bool_] | None, shape: tuple[int, ...], min_count: int = 1, ) -> np.ndarray | float | NaTType: @@ -1455,7 +1449,7 @@ def _maybe_null_out( def check_below_min_count( - shape: tuple[int, ...], mask: np.ndarray | None, min_count: int + shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int ) -> bool: """ Check for the `min_count` keyword. Returns True if below `min_count` (when @@ -1465,7 +1459,7 @@ def check_below_min_count( ---------- shape : tuple The shape of the values (`values.shape`). - mask : ndarray or None + mask : ndarray[bool] or None Boolean numpy array (typically of same shape as `shape`) or None. min_count : int Keyword passed through from sum/prod call. @@ -1634,7 +1628,11 @@ def f(x, y): def _nanpercentile_1d( - values: np.ndarray, mask: np.ndarray, q: np.ndarray, na_value: Scalar, interpolation + values: np.ndarray, + mask: npt.NDArray[np.bool_], + q: np.ndarray, + na_value: Scalar, + interpolation, ) -> Scalar | np.ndarray: """ Wrapper for np.percentile that skips missing values, specialized to @@ -1668,7 +1666,7 @@ def nanpercentile( q: np.ndarray, *, na_value, - mask: np.ndarray, + mask: npt.NDArray[np.bool_], interpolation, ): """