Skip to content

TYP: Annotations in pandas/core/nanops.py #30461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 20, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 106 additions & 31 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandas._config import get_option

from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
from pandas._typing import Dtype, Scalar
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
Expand Down Expand Up @@ -37,7 +38,7 @@
_USE_BOTTLENECK = False


def set_use_bottleneck(v=True):
def set_use_bottleneck(v: bool = True) -> None:
# set/unset to use bottleneck
global _USE_BOTTLENECK
if _BOTTLENECK_INSTALLED:
Expand Down Expand Up @@ -93,7 +94,9 @@ def __call__(self, alt):
bn_func = None

@functools.wraps(alt)
def f(values, axis=None, skipna=True, **kwds):
def f(
values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, **kwds
):
if len(self.kwargs) > 0:
for k, v in self.kwargs.items():
if k not in kwds:
Expand Down Expand Up @@ -129,10 +132,10 @@ def f(values, axis=None, skipna=True, **kwds):
return f


def _bn_ok_dtype(dt, name: str) -> bool:
def _bn_ok_dtype(dtype: Dtype, name: str) -> bool:
# Bottleneck chokes on datetime64
if not is_object_dtype(dt) and not (
is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt)
if not is_object_dtype(dtype) and not (
is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype)
):

# GH 15507
Expand Down Expand Up @@ -163,7 +166,9 @@ def _has_infs(result) -> bool:
return False


def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
def _get_fill_value(
dtype: Dtype, fill_value: Optional[Scalar] = None, fill_value_typ=None
):
""" return the correct fill value for the dtype of the values """
if fill_value is not None:
return fill_value
Expand Down Expand Up @@ -326,12 +331,12 @@ def _get_values(
return values, mask, dtype, dtype_max, fill_value


def _na_ok_dtype(dtype):
def _na_ok_dtype(dtype) -> bool:
# TODO: what about datetime64tz? PeriodDtype?
return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64))


def _wrap_results(result, dtype, fill_value=None):
def _wrap_results(result, dtype: Dtype, fill_value=None):
""" wrap our results if needed """

if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
Expand Down Expand Up @@ -362,7 +367,9 @@ def _wrap_results(result, dtype, fill_value=None):
return result


def _na_for_min_count(values, axis: Optional[int]):
def _na_for_min_count(
values: np.ndarray, axis: Optional[int]
) -> Union[Scalar, np.ndarray]:
"""
Return the missing value for `values`.

Expand Down Expand Up @@ -393,7 +400,12 @@ def _na_for_min_count(values, axis: Optional[int]):
return result


def nanany(values, axis=None, skipna: bool = True, mask=None):
def nanany(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> bool:
"""
Check if any elements along an axis evaluate to True.

Expand Down Expand Up @@ -425,7 +437,12 @@ def nanany(values, axis=None, skipna: bool = True, mask=None):
return values.any(axis)


def nanall(values, axis=None, skipna: bool = True, mask=None):
def nanall(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> bool:
"""
Check if all elements along an axis evaluate to True.

Expand Down Expand Up @@ -458,7 +475,13 @@ def nanall(values, axis=None, skipna: bool = True, mask=None):


@disallow("M8")
def nansum(values, axis=None, skipna=True, min_count=0, mask=None):
def nansum(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
min_count: int = 0,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Sum the elements along an axis ignoring NaNs

Expand Down Expand Up @@ -629,7 +652,7 @@ def _get_counts_nanvar(
mask: Optional[np.ndarray],
axis: Optional[int],
ddof: int,
dtype=float,
dtype: Dtype = float,
) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]:
""" Get the count of non-null values along an axis, accounting
for degrees of freedom.
Expand Down Expand Up @@ -776,7 +799,13 @@ def nanvar(values, axis=None, skipna=True, ddof=1, mask=None):


@disallow("M8", "m8")
def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
def nansem(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
ddof: int = 1,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Compute the standard error in the mean along given axis while ignoring NaNs

Expand Down Expand Up @@ -821,7 +850,12 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):

def _nanminmax(meth, fill_value_typ):
@bottleneck_switch(name="nan" + meth)
def reduction(values, axis=None, skipna=True, mask=None):
def reduction(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> Dtype:

values, mask, dtype, dtype_max, fill_value = _get_values(
values, skipna, fill_value_typ=fill_value_typ, mask=mask
Expand All @@ -847,7 +881,12 @@ def reduction(values, axis=None, skipna=True, mask=None):


@disallow("O")
def nanargmax(values, axis=None, skipna=True, mask=None):
def nanargmax(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> int:
"""
Parameters
----------
Expand Down Expand Up @@ -878,7 +917,12 @@ def nanargmax(values, axis=None, skipna=True, mask=None):


@disallow("O")
def nanargmin(values, axis=None, skipna=True, mask=None):
def nanargmin(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> int:
"""
Parameters
----------
Expand Down Expand Up @@ -909,7 +953,12 @@ def nanargmin(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nanskew(values, axis=None, skipna=True, mask=None):
def nanskew(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> float:
""" Compute the sample skewness.

The statistic computed here is the adjusted Fisher-Pearson standardized
Expand Down Expand Up @@ -987,7 +1036,12 @@ def nanskew(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nankurt(values, axis=None, skipna=True, mask=None):
def nankurt(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Compute the sample excess kurtosis

Expand Down Expand Up @@ -1075,7 +1129,13 @@ def nankurt(values, axis=None, skipna=True, mask=None):


@disallow("M8", "m8")
def nanprod(values, axis=None, skipna=True, min_count=0, mask=None):
def nanprod(
values: np.ndarray,
axis: Optional[int] = None,
skipna: bool = True,
min_count: int = 0,
mask: Optional[np.ndarray] = None,
) -> float:
"""
Parameters
----------
Expand All @@ -1088,18 +1148,15 @@ def nanprod(values, axis=None, skipna=True, min_count=0, mask=None):

Returns
-------
result : dtype
Dtype
The product of all elements on a given axis. ( NaNs are treated as 1)

Examples
--------
>>> import pandas.core.nanops as nanops
>>> s = pd.Series([1, 2, 3, np.nan])
>>> nanops.nanprod(s)
6.0

Returns
-------
The product of all elements on a given axis. ( NaNs are treated as 1)
"""
mask = _maybe_get_mask(values, skipna, mask)

Expand Down Expand Up @@ -1138,7 +1195,7 @@ def _get_counts(
values_shape: Tuple[int],
mask: Optional[np.ndarray],
axis: Optional[int],
dtype=float,
dtype: Dtype = float,
) -> Union[int, np.ndarray]:
""" Get the count of non-null values along an axis

Expand Down Expand Up @@ -1184,7 +1241,13 @@ def _maybe_null_out(
mask: Optional[np.ndarray],
shape: Tuple,
min_count: int = 1,
) -> np.ndarray:
) -> float:
"""
Returns
-------
Dtype
The product of all elements on a given axis. ( NaNs are treated as 1)
"""
if mask is not None and axis is not None and getattr(result, "ndim", False):
null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
if np.any(null_mask):
Expand Down Expand Up @@ -1218,7 +1281,9 @@ def _zero_out_fperr(arg):


@disallow("M8", "m8")
def nancorr(a, b, method="pearson", min_periods=None):
def nancorr(
a: np.ndarray, b: np.ndarray, method="pearson", min_periods: Optional[int] = None,
):
"""
a, b: ndarrays
"""
Expand Down Expand Up @@ -1268,7 +1333,7 @@ def _spearman(a, b):


@disallow("M8", "m8")
def nancov(a, b, min_periods=None):
def nancov(a: np.ndarray, b: np.ndarray, min_periods: Optional[int] = None):
if len(a) != len(b):
raise AssertionError("Operands to nancov must have same size")

Expand Down Expand Up @@ -1341,7 +1406,9 @@ def f(x, y):
nanne = make_nancomp(operator.ne)


def _nanpercentile_1d(values, mask, q, na_value, interpolation):
def _nanpercentile_1d(
values: np.ndarray, mask: np.ndarray, q, na_value: Scalar, interpolation
) -> Union[Scalar, np.ndarray]:
"""
Wrapper for np.percentile that skips missing values, specialized to
1-dimensional case.
Expand Down Expand Up @@ -1372,7 +1439,15 @@ def _nanpercentile_1d(values, mask, q, na_value, interpolation):
return np.percentile(values, q, interpolation=interpolation)


def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation):
def nanpercentile(
values: np.ndarray,
q,
axis: int,
na_value,
mask: np.ndarray,
ndim: int,
interpolation,
):
"""
Wrapper for np.percentile that skips missing values.

Expand Down