Skip to content

Commit de4d74d

Browse files
authored
REF: nanpercentile -> array_algos.quantile (#44655)
1 parent a3f3762 commit de4d74d

File tree

2 files changed

+95
-96
lines changed

2 files changed

+95
-96
lines changed

pandas/core/array_algos/quantile.py

+95-6
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,19 @@
22

33
import numpy as np
44

5+
from pandas._libs import lib
56
from pandas._typing import (
67
ArrayLike,
8+
Scalar,
79
npt,
810
)
11+
from pandas.compat.numpy import np_percentile_argname
912

1013
from pandas.core.dtypes.missing import (
1114
isna,
1215
na_value_for_dtype,
1316
)
1417

15-
from pandas.core.nanops import nanpercentile
16-
1718

1819
def quantile_compat(
1920
values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
@@ -41,7 +42,7 @@ def quantile_compat(
4142

4243
def quantile_with_mask(
4344
values: np.ndarray,
44-
mask: np.ndarray,
45+
mask: npt.NDArray[np.bool_],
4546
fill_value,
4647
qs: npt.NDArray[np.float64],
4748
interpolation: str,
@@ -84,10 +85,9 @@ def quantile_with_mask(
8485
flat = np.array([fill_value] * len(qs))
8586
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
8687
else:
87-
# asarray needed for Sparse, see GH#24600
88-
result = nanpercentile(
88+
result = _nanpercentile(
8989
values,
90-
np.array(qs) * 100,
90+
qs * 100.0,
9191
na_value=fill_value,
9292
mask=mask,
9393
interpolation=interpolation,
@@ -97,3 +97,92 @@ def quantile_with_mask(
9797
result = result.T
9898

9999
return result
100+
101+
102+
def _nanpercentile_1d(
103+
values: np.ndarray,
104+
mask: npt.NDArray[np.bool_],
105+
qs: npt.NDArray[np.float64],
106+
na_value: Scalar,
107+
interpolation,
108+
) -> Scalar | np.ndarray:
109+
"""
110+
Wrapper for np.percentile that skips missing values, specialized to
111+
1-dimensional case.
112+
113+
Parameters
114+
----------
115+
values : array over which to find quantiles
116+
mask : ndarray[bool]
117+
locations in values that should be considered missing
118+
qs : np.ndarray[float64] of quantile indices to find
119+
na_value : scalar
120+
value to return for empty or all-null values
121+
interpolation : str
122+
123+
Returns
124+
-------
125+
quantiles : scalar or array
126+
"""
127+
# mask is Union[ExtensionArray, ndarray]
128+
values = values[~mask]
129+
130+
if len(values) == 0:
131+
return np.array([na_value] * len(qs), dtype=values.dtype)
132+
133+
return np.percentile(values, qs, **{np_percentile_argname: interpolation})
134+
135+
136+
def _nanpercentile(
137+
values: np.ndarray,
138+
qs: npt.NDArray[np.float64],
139+
*,
140+
na_value,
141+
mask: npt.NDArray[np.bool_],
142+
interpolation,
143+
):
144+
"""
145+
Wrapper for np.percentile that skips missing values.
146+
147+
Parameters
148+
----------
149+
values : np.ndarray[ndim=2] over which to find quantiles
150+
qs : np.ndarray[float64] of quantile indices to find
151+
na_value : scalar
152+
value to return for empty or all-null values
153+
mask : np.ndarray[bool]
154+
locations in values that should be considered missing
155+
interpolation : str
156+
157+
Returns
158+
-------
159+
quantiles : scalar or array
160+
"""
161+
162+
if values.dtype.kind in ["m", "M"]:
163+
# need to cast to integer to avoid rounding errors in numpy
164+
result = _nanpercentile(
165+
values.view("i8"),
166+
qs=qs,
167+
na_value=na_value.view("i8"),
168+
mask=mask,
169+
interpolation=interpolation,
170+
)
171+
172+
# Note: we have to do `astype` and not view because in general we
173+
# have float result at this point, not i8
174+
return result.astype(values.dtype)
175+
176+
if not lib.is_scalar(mask) and mask.any():
177+
# Caller is responsible for ensuring mask shape match
178+
assert mask.shape == values.shape
179+
result = [
180+
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
181+
for (val, m) in zip(list(values), list(mask))
182+
]
183+
result = np.array(result, dtype=values.dtype, copy=False).T
184+
return result
185+
else:
186+
return np.percentile(
187+
values, qs, axis=1, **{np_percentile_argname: interpolation}
188+
)

pandas/core/nanops.py

-90
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
npt,
3131
)
3232
from pandas.compat._optional import import_optional_dependency
33-
from pandas.compat.numpy import np_percentile_argname
3433

3534
from pandas.core.dtypes.common import (
3635
is_any_int_dtype,
@@ -1661,95 +1660,6 @@ def f(x, y):
16611660
nanne = make_nancomp(operator.ne)
16621661

16631662

1664-
def _nanpercentile_1d(
1665-
values: np.ndarray,
1666-
mask: npt.NDArray[np.bool_],
1667-
q: np.ndarray,
1668-
na_value: Scalar,
1669-
interpolation,
1670-
) -> Scalar | np.ndarray:
1671-
"""
1672-
Wrapper for np.percentile that skips missing values, specialized to
1673-
1-dimensional case.
1674-
1675-
Parameters
1676-
----------
1677-
values : array over which to find quantiles
1678-
mask : ndarray[bool]
1679-
locations in values that should be considered missing
1680-
q : np.ndarray[float64] of quantile indices to find
1681-
na_value : scalar
1682-
value to return for empty or all-null values
1683-
interpolation : str
1684-
1685-
Returns
1686-
-------
1687-
quantiles : scalar or array
1688-
"""
1689-
# mask is Union[ExtensionArray, ndarray]
1690-
values = values[~mask]
1691-
1692-
if len(values) == 0:
1693-
return np.array([na_value] * len(q), dtype=values.dtype)
1694-
1695-
return np.percentile(values, q, **{np_percentile_argname: interpolation})
1696-
1697-
1698-
def nanpercentile(
1699-
values: np.ndarray,
1700-
q: np.ndarray,
1701-
*,
1702-
na_value,
1703-
mask: npt.NDArray[np.bool_],
1704-
interpolation,
1705-
):
1706-
"""
1707-
Wrapper for np.percentile that skips missing values.
1708-
1709-
Parameters
1710-
----------
1711-
values : np.ndarray[ndim=2] over which to find quantiles
1712-
q : np.ndarray[float64] of quantile indices to find
1713-
na_value : scalar
1714-
value to return for empty or all-null values
1715-
mask : ndarray[bool]
1716-
locations in values that should be considered missing
1717-
interpolation : str
1718-
1719-
Returns
1720-
-------
1721-
quantiles : scalar or array
1722-
"""
1723-
1724-
if values.dtype.kind in ["m", "M"]:
1725-
# need to cast to integer to avoid rounding errors in numpy
1726-
result = nanpercentile(
1727-
values.view("i8"),
1728-
q=q,
1729-
na_value=na_value.view("i8"),
1730-
mask=mask,
1731-
interpolation=interpolation,
1732-
)
1733-
1734-
# Note: we have to do `astype` and not view because in general we
1735-
# have float result at this point, not i8
1736-
return result.astype(values.dtype)
1737-
1738-
if not lib.is_scalar(mask) and mask.any():
1739-
# Caller is responsible for ensuring mask shape match
1740-
assert mask.shape == values.shape
1741-
result = [
1742-
_nanpercentile_1d(val, m, q, na_value, interpolation=interpolation)
1743-
for (val, m) in zip(list(values), list(mask))
1744-
]
1745-
result = np.array(result, dtype=values.dtype, copy=False).T
1746-
return result
1747-
else:
1748-
return np.percentile(
1749-
values, q, axis=1, **{np_percentile_argname: interpolation}
1750-
)
1751-
1752-
17531663
def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
17541664
"""
17551665
Cumulative function with skipna support.

0 commit comments

Comments
 (0)