TYP: nanops (#43264)

jbrockmendel · web-flow · commit d15a21059d32 · 2021-08-30T08:50:06.000+01:00
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -27,11 +27,11 @@
     F,
     Scalar,
     Shape,
+    npt,
 )
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.common import (
-    get_dtype,
     is_any_int_dtype,
     is_bool_dtype,
     is_complex,
@@ -209,8 +209,8 @@ def _get_fill_value(
 
 
 def _maybe_get_mask(
-    values: np.ndarray, skipna: bool, mask: np.ndarray | None
-) -> np.ndarray | None:
+    values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None
+) -> npt.NDArray[np.bool_] | None:
     """
     Compute a mask if and only if necessary.
 
@@ -239,7 +239,7 @@ def _maybe_get_mask(
 
     Returns
     -------
-    Optional[np.ndarray]
+    Optional[np.ndarray[bool]]
     """
     if mask is None:
         if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
@@ -257,8 +257,8 @@ def _get_values(
     skipna: bool,
     fill_value: Any = None,
     fill_value_typ: str | None = None,
-    mask: np.ndarray | None = None,
-) -> tuple[np.ndarray, np.ndarray | None, np.dtype, np.dtype, Any]:
+    mask: npt.NDArray[np.bool_] | None = None,
+) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:
     """
     Utility to get the values view, mask, dtype, dtype_max, and fill_value.
 
@@ -279,7 +279,7 @@ def _get_values(
         value to fill NaNs with
     fill_value_typ : str
         Set to '+inf' or '-inf' to handle dtype-specific infinities
-    mask : Optional[np.ndarray]
+    mask : Optional[np.ndarray[bool]]
         nan-mask if known
 
     Returns
@@ -396,7 +396,7 @@ def new_func(
         *,
         axis: int | None = None,
         skipna: bool = True,
-        mask: np.ndarray | None = None,
+        mask: npt.NDArray[np.bool_] | None = None,
         **kwargs,
     ):
         orig_values = values
@@ -454,7 +454,7 @@ def nanany(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> bool:
     """
     Check if any elements along an axis evaluate to True.
@@ -500,7 +500,7 @@ def nanall(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> bool:
     """
     Check if all elements along an axis evaluate to True.
@@ -549,7 +549,7 @@ def nansum(
     axis: int | None = None,
     skipna: bool = True,
     min_count: int = 0,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Sum the elements along an axis ignoring NaNs
@@ -592,7 +592,7 @@ def nansum(
 def _mask_datetimelike_result(
     result: np.ndarray | np.datetime64 | np.timedelta64,
     axis: int | None,
-    mask: np.ndarray,
+    mask: npt.NDArray[np.bool_],
     orig_values: np.ndarray,
 ) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:
     if isinstance(result, np.ndarray):
@@ -616,7 +616,7 @@ def nanmean(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Compute the mean of the element along an axis ignoring NaNs
@@ -781,10 +781,10 @@ def get_empty_reduction_result(
 
 def _get_counts_nanvar(
     values_shape: Shape,
-    mask: np.ndarray | None,
+    mask: npt.NDArray[np.bool_] | None,
     axis: int | None,
     ddof: int,
-    dtype: Dtype = float,
+    dtype: np.dtype = np.dtype(np.float64),
 ) -> tuple[int | float | np.ndarray, int | float | np.ndarray]:
     """
     Get the count of non-null values along an axis, accounting
@@ -808,7 +808,6 @@ def _get_counts_nanvar(
     count : int, np.nan or np.ndarray
     d : int, np.nan or np.ndarray
     """
-    dtype = get_dtype(dtype)
     count = _get_counts(values_shape, mask, axis, dtype=dtype)
     d = count - dtype.type(ddof)
 
@@ -931,7 +930,7 @@ def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):
     # unless we were dealing with a float array, in which case use the same
     # precision as the original values array.
     if is_float_dtype(dtype):
-        result = result.astype(dtype)
+        result = result.astype(dtype, copy=False)
     return result
 
 
@@ -942,7 +941,7 @@ def nansem(
     axis: int | None = None,
     skipna: bool = True,
     ddof: int = 1,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Compute the standard error in the mean along given axis while ignoring NaNs
@@ -993,7 +992,7 @@ def reduction(
         *,
         axis: int | None = None,
         skipna: bool = True,
-        mask: np.ndarray | None = None,
+        mask: npt.NDArray[np.bool_] | None = None,
     ) -> Dtype:
 
         values, mask, dtype, dtype_max, fill_value = _get_values(
@@ -1025,7 +1024,7 @@ def nanargmax(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> int | np.ndarray:
     """
     Parameters
@@ -1071,7 +1070,7 @@ def nanargmin(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> int | np.ndarray:
     """
     Parameters
@@ -1117,7 +1116,7 @@ def nanskew(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Compute the sample skewness.
@@ -1185,7 +1184,7 @@ def nanskew(
 
     dtype = values.dtype
     if is_float_dtype(dtype):
-        result = result.astype(dtype)
+        result = result.astype(dtype, copy=False)
 
     if isinstance(result, np.ndarray):
         result = np.where(m2 == 0, 0, result)
@@ -1204,7 +1203,7 @@ def nankurt(
     *,
     axis: int | None = None,
     skipna: bool = True,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Compute the sample excess kurtosis
@@ -1285,7 +1284,7 @@ def nankurt(
 
     dtype = values.dtype
     if is_float_dtype(dtype):
-        result = result.astype(dtype)
+        result = result.astype(dtype, copy=False)
 
     if isinstance(result, np.ndarray):
         result = np.where(denominator == 0, 0, result)
@@ -1301,7 +1300,7 @@ def nanprod(
     axis: int | None = None,
     skipna: bool = True,
     min_count: int = 0,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> float:
     """
     Parameters
@@ -1339,7 +1338,10 @@ def nanprod(
 
 
 def _maybe_arg_null_out(
-    result: np.ndarray, axis: int | None, mask: np.ndarray | None, skipna: bool
+    result: np.ndarray,
+    axis: int | None,
+    mask: npt.NDArray[np.bool_] | None,
+    skipna: bool,
 ) -> np.ndarray | int:
     # helper function for nanargmin/nanargmax
     if mask is None:
@@ -1367,10 +1369,10 @@ def _maybe_arg_null_out(
 
 
 def _get_counts(
-    values_shape: tuple[int, ...],
-    mask: np.ndarray | None,
+    values_shape: Shape,
+    mask: npt.NDArray[np.bool_] | None,
     axis: int | None,
-    dtype: Dtype = float,
+    dtype: np.dtype = np.dtype(np.float64),
 ) -> int | float | np.ndarray:
     """
     Get the count of non-null values along an axis
@@ -1390,7 +1392,6 @@ def _get_counts(
     -------
     count : scalar or array
     """
-    dtype = get_dtype(dtype)
     if axis is None:
         if mask is not None:
             n = mask.size - mask.sum()
@@ -1405,20 +1406,13 @@ def _get_counts(
 
     if is_scalar(count):
         return dtype.type(count)
-    try:
-        return count.astype(dtype)
-    except AttributeError:
-        # error: Argument "dtype" to "array" has incompatible type
-        # "Union[ExtensionDtype, dtype]"; expected "Union[dtype, None, type,
-        # _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
-        # Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]"
-        return np.array(count, dtype=dtype)  # type: ignore[arg-type]
+    return count.astype(dtype, copy=False)
 
 
 def _maybe_null_out(
     result: np.ndarray | float | NaTType,
     axis: int | None,
-    mask: np.ndarray | None,
+    mask: npt.NDArray[np.bool_] | None,
     shape: tuple[int, ...],
     min_count: int = 1,
 ) -> np.ndarray | float | NaTType:
@@ -1455,7 +1449,7 @@ def _maybe_null_out(
 
 
 def check_below_min_count(
-    shape: tuple[int, ...], mask: np.ndarray | None, min_count: int
+    shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
 ) -> bool:
     """
     Check for the `min_count` keyword. Returns True if below `min_count` (when
@@ -1465,7 +1459,7 @@ def check_below_min_count(
     ----------
     shape : tuple
         The shape of the values (`values.shape`).
-    mask : ndarray or None
+    mask : ndarray[bool] or None
         Boolean numpy array (typically of same shape as `shape`) or None.
     min_count : int
         Keyword passed through from sum/prod call.
@@ -1634,7 +1628,11 @@ def f(x, y):
 
 
 def _nanpercentile_1d(
-    values: np.ndarray, mask: np.ndarray, q: np.ndarray, na_value: Scalar, interpolation
+    values: np.ndarray,
+    mask: npt.NDArray[np.bool_],
+    q: np.ndarray,
+    na_value: Scalar,
+    interpolation,
 ) -> Scalar | np.ndarray:
     """
     Wrapper for np.percentile that skips missing values, specialized to
@@ -1668,7 +1666,7 @@ def nanpercentile(
     q: np.ndarray,
     *,
     na_value,
-    mask: np.ndarray,
+    mask: npt.NDArray[np.bool_],
     interpolation,
 ):
     """