Skip to content

REF: nanpercentile -> array_algos.quantile #44655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 23, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 95 additions & 6 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@

import numpy as np

from pandas._libs import lib
from pandas._typing import (
ArrayLike,
Scalar,
npt,
)
from pandas.compat.numpy import np_percentile_argname

from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
)

from pandas.core.nanops import nanpercentile


def quantile_compat(
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
Expand Down Expand Up @@ -41,7 +42,7 @@ def quantile_compat(

def quantile_with_mask(
values: np.ndarray,
mask: np.ndarray,
mask: npt.NDArray[np.bool_],
fill_value,
qs: npt.NDArray[np.float64],
interpolation: str,
Expand Down Expand Up @@ -84,10 +85,9 @@ def quantile_with_mask(
flat = np.array([fill_value] * len(qs))
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
else:
# asarray needed for Sparse, see GH#24600
result = nanpercentile(
result = _nanpercentile(
values,
np.array(qs) * 100,
qs * 100.0,
na_value=fill_value,
mask=mask,
interpolation=interpolation,
Expand All @@ -97,3 +97,92 @@ def quantile_with_mask(
result = result.T

return result


def _nanpercentile_1d(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
qs: npt.NDArray[np.float64],
na_value: Scalar,
interpolation,
) -> Scalar | np.ndarray:
"""
Wrapper for np.percentile that skips missing values, specialized to
1-dimensional case.

Parameters
----------
values : array over which to find quantiles
mask : ndarray[bool]
locations in values that should be considered missing
qs : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
interpolation : str

Returns
-------
quantiles : scalar or array
"""
# mask is Union[ExtensionArray, ndarray]
values = values[~mask]

if len(values) == 0:
return np.array([na_value] * len(qs), dtype=values.dtype)

return np.percentile(values, qs, **{np_percentile_argname: interpolation})


def _nanpercentile(
values: np.ndarray,
qs: npt.NDArray[np.float64],
*,
na_value,
mask: npt.NDArray[np.bool_],
interpolation,
):
"""
Wrapper for np.percentile that skips missing values.

Parameters
----------
values : np.ndarray[ndim=2] over which to find quantiles
qs : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
mask : np.ndarray[bool]
locations in values that should be considered missing
interpolation : str

Returns
-------
quantiles : scalar or array
"""

if values.dtype.kind in ["m", "M"]:
# need to cast to integer to avoid rounding errors in numpy
result = _nanpercentile(
values.view("i8"),
qs=qs,
na_value=na_value.view("i8"),
mask=mask,
interpolation=interpolation,
)

# Note: we have to do `astype` and not view because in general we
# have float result at this point, not i8
return result.astype(values.dtype)

if not lib.is_scalar(mask) and mask.any():
# Caller is responsible for ensuring mask shape match
assert mask.shape == values.shape
result = [
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: While refactoring: are the list calls necessary here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i wondered the same thing and don't have a good answer.

]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
else:
return np.percentile(
values, qs, axis=1, **{np_percentile_argname: interpolation}
)
90 changes: 0 additions & 90 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
npt,
)
from pandas.compat._optional import import_optional_dependency
from pandas.compat.numpy import np_percentile_argname

from pandas.core.dtypes.common import (
is_any_int_dtype,
Expand Down Expand Up @@ -1668,95 +1667,6 @@ def f(x, y):
nanne = make_nancomp(operator.ne)


def _nanpercentile_1d(
values: np.ndarray,
mask: npt.NDArray[np.bool_],
q: np.ndarray,
na_value: Scalar,
interpolation,
) -> Scalar | np.ndarray:
"""
Wrapper for np.percentile that skips missing values, specialized to
1-dimensional case.

Parameters
----------
values : array over which to find quantiles
mask : ndarray[bool]
locations in values that should be considered missing
q : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
interpolation : str

Returns
-------
quantiles : scalar or array
"""
# mask is Union[ExtensionArray, ndarray]
values = values[~mask]

if len(values) == 0:
return np.array([na_value] * len(q), dtype=values.dtype)

return np.percentile(values, q, **{np_percentile_argname: interpolation})


def nanpercentile(
values: np.ndarray,
q: np.ndarray,
*,
na_value,
mask: npt.NDArray[np.bool_],
interpolation,
):
"""
Wrapper for np.percentile that skips missing values.

Parameters
----------
values : np.ndarray[ndim=2] over which to find quantiles
q : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
mask : ndarray[bool]
locations in values that should be considered missing
interpolation : str

Returns
-------
quantiles : scalar or array
"""

if values.dtype.kind in ["m", "M"]:
# need to cast to integer to avoid rounding errors in numpy
result = nanpercentile(
values.view("i8"),
q=q,
na_value=na_value.view("i8"),
mask=mask,
interpolation=interpolation,
)

# Note: we have to do `astype` and not view because in general we
# have float result at this point, not i8
return result.astype(values.dtype)

if not lib.is_scalar(mask) and mask.any():
# Caller is responsible for ensuring mask shape match
assert mask.shape == values.shape
result = [
_nanpercentile_1d(val, m, q, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
else:
return np.percentile(
values, q, axis=1, **{np_percentile_argname: interpolation}
)


def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
"""
Cumulative function with skipna support.
Expand Down