Skip to content

TYP: nanops #43264

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 42 additions & 44 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
F,
Scalar,
Shape,
npt,
)
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.common import (
get_dtype,
is_any_int_dtype,
is_bool_dtype,
is_complex,
Expand Down Expand Up @@ -209,8 +209,8 @@ def _get_fill_value(


def _maybe_get_mask(
values: np.ndarray, skipna: bool, mask: np.ndarray | None
) -> np.ndarray | None:
values: np.ndarray, skipna: bool, mask: npt.NDArray[np.bool_] | None
) -> npt.NDArray[np.bool_] | None:
"""
Compute a mask if and only if necessary.

Expand Down Expand Up @@ -239,7 +239,7 @@ def _maybe_get_mask(

Returns
-------
Optional[np.ndarray]
Optional[np.ndarray[bool]]
"""
if mask is None:
if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype):
Expand All @@ -257,8 +257,8 @@ def _get_values(
skipna: bool,
fill_value: Any = None,
fill_value_typ: str | None = None,
mask: np.ndarray | None = None,
) -> tuple[np.ndarray, np.ndarray | None, np.dtype, np.dtype, Any]:
mask: npt.NDArray[np.bool_] | None = None,
) -> tuple[np.ndarray, npt.NDArray[np.bool_] | None, np.dtype, np.dtype, Any]:
"""
Utility to get the values view, mask, dtype, dtype_max, and fill_value.

Expand All @@ -279,7 +279,7 @@ def _get_values(
value to fill NaNs with
fill_value_typ : str
Set to '+inf' or '-inf' to handle dtype-specific infinities
mask : Optional[np.ndarray]
mask : Optional[np.ndarray[bool]]
nan-mask if known

Returns
Expand Down Expand Up @@ -396,7 +396,7 @@ def new_func(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
**kwargs,
):
orig_values = values
Expand Down Expand Up @@ -454,7 +454,7 @@ def nanany(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> bool:
"""
Check if any elements along an axis evaluate to True.
Expand Down Expand Up @@ -500,7 +500,7 @@ def nanall(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> bool:
"""
Check if all elements along an axis evaluate to True.
Expand Down Expand Up @@ -549,7 +549,7 @@ def nansum(
axis: int | None = None,
skipna: bool = True,
min_count: int = 0,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Sum the elements along an axis ignoring NaNs
Expand Down Expand Up @@ -592,7 +592,7 @@ def nansum(
def _mask_datetimelike_result(
result: np.ndarray | np.datetime64 | np.timedelta64,
axis: int | None,
mask: np.ndarray,
mask: npt.NDArray[np.bool_],
orig_values: np.ndarray,
) -> np.ndarray | np.datetime64 | np.timedelta64 | NaTType:
if isinstance(result, np.ndarray):
Expand All @@ -616,7 +616,7 @@ def nanmean(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Compute the mean of the element along an axis ignoring NaNs
Expand Down Expand Up @@ -781,10 +781,10 @@ def get_empty_reduction_result(

def _get_counts_nanvar(
values_shape: Shape,
mask: np.ndarray | None,
mask: npt.NDArray[np.bool_] | None,
axis: int | None,
ddof: int,
dtype: Dtype = float,
dtype: np.dtype = np.dtype(np.float64),
) -> tuple[int | float | np.ndarray, int | float | np.ndarray]:
"""
Get the count of non-null values along an axis, accounting
Expand All @@ -808,7 +808,6 @@ def _get_counts_nanvar(
count : int, np.nan or np.ndarray
d : int, np.nan or np.ndarray
"""
dtype = get_dtype(dtype)
count = _get_counts(values_shape, mask, axis, dtype=dtype)
d = count - dtype.type(ddof)

Expand Down Expand Up @@ -931,7 +930,7 @@ def nanvar(values, *, axis=None, skipna=True, ddof=1, mask=None):
# unless we were dealing with a float array, in which case use the same
# precision as the original values array.
if is_float_dtype(dtype):
result = result.astype(dtype)
result = result.astype(dtype, copy=False)
return result


Expand All @@ -942,7 +941,7 @@ def nansem(
axis: int | None = None,
skipna: bool = True,
ddof: int = 1,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Compute the standard error in the mean along given axis while ignoring NaNs
Expand Down Expand Up @@ -993,7 +992,7 @@ def reduction(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> Dtype:

values, mask, dtype, dtype_max, fill_value = _get_values(
Expand Down Expand Up @@ -1025,7 +1024,7 @@ def nanargmax(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> int | np.ndarray:
"""
Parameters
Expand Down Expand Up @@ -1071,7 +1070,7 @@ def nanargmin(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> int | np.ndarray:
"""
Parameters
Expand Down Expand Up @@ -1117,7 +1116,7 @@ def nanskew(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Compute the sample skewness.
Expand Down Expand Up @@ -1185,7 +1184,7 @@ def nanskew(

dtype = values.dtype
if is_float_dtype(dtype):
result = result.astype(dtype)
result = result.astype(dtype, copy=False)

if isinstance(result, np.ndarray):
result = np.where(m2 == 0, 0, result)
Expand All @@ -1204,7 +1203,7 @@ def nankurt(
*,
axis: int | None = None,
skipna: bool = True,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Compute the sample excess kurtosis
Expand Down Expand Up @@ -1285,7 +1284,7 @@ def nankurt(

dtype = values.dtype
if is_float_dtype(dtype):
result = result.astype(dtype)
result = result.astype(dtype, copy=False)

if isinstance(result, np.ndarray):
result = np.where(denominator == 0, 0, result)
Expand All @@ -1301,7 +1300,7 @@ def nanprod(
axis: int | None = None,
skipna: bool = True,
min_count: int = 0,
mask: np.ndarray | None = None,
mask: npt.NDArray[np.bool_] | None = None,
) -> float:
"""
Parameters
Expand Down Expand Up @@ -1339,7 +1338,10 @@ def nanprod(


def _maybe_arg_null_out(
result: np.ndarray, axis: int | None, mask: np.ndarray | None, skipna: bool
result: np.ndarray,
axis: int | None,
mask: npt.NDArray[np.bool_] | None,
skipna: bool,
) -> np.ndarray | int:
# helper function for nanargmin/nanargmax
if mask is None:
Expand Down Expand Up @@ -1367,10 +1369,10 @@ def _maybe_arg_null_out(


def _get_counts(
values_shape: tuple[int, ...],
mask: np.ndarray | None,
values_shape: Shape,
mask: npt.NDArray[np.bool_] | None,
axis: int | None,
dtype: Dtype = float,
dtype: np.dtype = np.dtype(np.float64),
) -> int | float | np.ndarray:
"""
Get the count of non-null values along an axis
Expand All @@ -1390,7 +1392,6 @@ def _get_counts(
-------
count : scalar or array
"""
dtype = get_dtype(dtype)
if axis is None:
if mask is not None:
n = mask.size - mask.sum()
Expand All @@ -1405,20 +1406,13 @@ def _get_counts(

if is_scalar(count):
return dtype.type(count)
try:
return count.astype(dtype)
except AttributeError:
# error: Argument "dtype" to "array" has incompatible type
# "Union[ExtensionDtype, dtype]"; expected "Union[dtype, None, type,
# _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]"
return np.array(count, dtype=dtype) # type: ignore[arg-type]
return count.astype(dtype, copy=False)


def _maybe_null_out(
result: np.ndarray | float | NaTType,
axis: int | None,
mask: np.ndarray | None,
mask: npt.NDArray[np.bool_] | None,
shape: tuple[int, ...],
min_count: int = 1,
) -> np.ndarray | float | NaTType:
Expand Down Expand Up @@ -1455,7 +1449,7 @@ def _maybe_null_out(


def check_below_min_count(
shape: tuple[int, ...], mask: np.ndarray | None, min_count: int
shape: tuple[int, ...], mask: npt.NDArray[np.bool_] | None, min_count: int
) -> bool:
"""
Check for the `min_count` keyword. Returns True if below `min_count` (when
Expand All @@ -1465,7 +1459,7 @@ def check_below_min_count(
----------
shape : tuple
The shape of the values (`values.shape`).
mask : ndarray or None
mask : ndarray[bool] or None
Boolean numpy array (typically of same shape as `shape`) or None.
min_count : int
Keyword passed through from sum/prod call.
Expand Down Expand Up @@ -1634,7 +1628,11 @@ def f(x, y):


def _nanpercentile_1d(
values: np.ndarray, mask: np.ndarray, q: np.ndarray, na_value: Scalar, interpolation
values: np.ndarray,
mask: npt.NDArray[np.bool_],
q: np.ndarray,
na_value: Scalar,
interpolation,
) -> Scalar | np.ndarray:
"""
Wrapper for np.percentile that skips missing values, specialized to
Expand Down Expand Up @@ -1668,7 +1666,7 @@ def nanpercentile(
q: np.ndarray,
*,
na_value,
mask: np.ndarray,
mask: npt.NDArray[np.bool_],
interpolation,
):
"""
Expand Down