diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index 261d19ade080f..64cd43a3e77cb 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -2,18 +2,19 @@ import numpy as np +from pandas._libs import lib from pandas._typing import ( ArrayLike, + Scalar, npt, ) +from pandas.compat.numpy import np_percentile_argname from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, ) -from pandas.core.nanops import nanpercentile - def quantile_compat( values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str @@ -41,7 +42,7 @@ def quantile_compat( def quantile_with_mask( values: np.ndarray, - mask: np.ndarray, + mask: npt.NDArray[np.bool_], fill_value, qs: npt.NDArray[np.float64], interpolation: str, @@ -84,10 +85,9 @@ def quantile_with_mask( flat = np.array([fill_value] * len(qs)) result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) else: - # asarray needed for Sparse, see GH#24600 - result = nanpercentile( + result = _nanpercentile( values, - np.array(qs) * 100, + qs * 100.0, na_value=fill_value, mask=mask, interpolation=interpolation, @@ -97,3 +97,92 @@ def quantile_with_mask( result = result.T return result + + +def _nanpercentile_1d( + values: np.ndarray, + mask: npt.NDArray[np.bool_], + qs: npt.NDArray[np.float64], + na_value: Scalar, + interpolation, +) -> Scalar | np.ndarray: + """ + Wrapper for np.percentile that skips missing values, specialized to + 1-dimensional case. + + Parameters + ---------- + values : array over which to find quantiles + mask : ndarray[bool] + locations in values that should be considered missing + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + # mask is Union[ExtensionArray, ndarray] + values = values[~mask] + + if len(values) == 0: + return np.array([na_value] * len(qs), dtype=values.dtype) + + return np.percentile(values, qs, **{np_percentile_argname: interpolation}) + + +def _nanpercentile( + values: np.ndarray, + qs: npt.NDArray[np.float64], + *, + na_value, + mask: npt.NDArray[np.bool_], + interpolation, +): + """ + Wrapper for np.percentile that skips missing values. + + Parameters + ---------- + values : np.ndarray[ndim=2] over which to find quantiles + qs : np.ndarray[float64] of quantile indices to find + na_value : scalar + value to return for empty or all-null values + mask : np.ndarray[bool] + locations in values that should be considered missing + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = _nanpercentile( + values.view("i8"), + qs=qs, + na_value=na_value.view("i8"), + mask=mask, + interpolation=interpolation, + ) + + # Note: we have to do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + + if not lib.is_scalar(mask) and mask.any(): + # Caller is responsible for ensuring mask shape match + assert mask.shape == values.shape + result = [ + _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) + for (val, m) in zip(list(values), list(mask)) + ] + result = np.array(result, dtype=values.dtype, copy=False).T + return result + else: + return np.percentile( + values, qs, axis=1, **{np_percentile_argname: interpolation} + ) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 3eb6d82ed0c54..40664f178993e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -30,7 +30,6 @@ npt, ) from pandas.compat._optional import import_optional_dependency -from pandas.compat.numpy import np_percentile_argname from pandas.core.dtypes.common import ( is_any_int_dtype, @@ -1661,95 +1660,6 @@ def f(x, y): nanne = make_nancomp(operator.ne) -def _nanpercentile_1d( - values: np.ndarray, - mask: npt.NDArray[np.bool_], - q: np.ndarray, - na_value: Scalar, - interpolation, -) -> Scalar | np.ndarray: - """ - Wrapper for np.percentile that skips missing values, specialized to - 1-dimensional case. - - Parameters - ---------- - values : array over which to find quantiles - mask : ndarray[bool] - locations in values that should be considered missing - q : np.ndarray[float64] of quantile indices to find - na_value : scalar - value to return for empty or all-null values - interpolation : str - - Returns - ------- - quantiles : scalar or array - """ - # mask is Union[ExtensionArray, ndarray] - values = values[~mask] - - if len(values) == 0: - return np.array([na_value] * len(q), dtype=values.dtype) - - return np.percentile(values, q, **{np_percentile_argname: interpolation}) - - -def nanpercentile( - values: np.ndarray, - q: np.ndarray, - *, - na_value, - mask: npt.NDArray[np.bool_], - interpolation, -): - """ - Wrapper for np.percentile that skips missing values. - - Parameters - ---------- - values : np.ndarray[ndim=2] over which to find quantiles - q : np.ndarray[float64] of quantile indices to find - na_value : scalar - value to return for empty or all-null values - mask : ndarray[bool] - locations in values that should be considered missing - interpolation : str - - Returns - ------- - quantiles : scalar or array - """ - - if values.dtype.kind in ["m", "M"]: - # need to cast to integer to avoid rounding errors in numpy - result = nanpercentile( - values.view("i8"), - q=q, - na_value=na_value.view("i8"), - mask=mask, - interpolation=interpolation, - ) - - # Note: we have to do `astype` and not view because in general we - # have float result at this point, not i8 - return result.astype(values.dtype) - - if not lib.is_scalar(mask) and mask.any(): - # Caller is responsible for ensuring mask shape match - assert mask.shape == values.shape - result = [ - _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation) - for (val, m) in zip(list(values), list(mask)) - ] - result = np.array(result, dtype=values.dtype, copy=False).T - return result - else: - return np.percentile( - values, q, axis=1, **{np_percentile_argname: interpolation} - ) - - def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: """ Cumulative function with skipna support.