REF: nanpercentile -> array_algos.quantile (#44655)

jbrockmendel · web-flow · commit de4d74d40921 · 2021-12-23T11:07:57.000-05:00
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
@@ -2,18 +2,19 @@
 
 import numpy as np
 
+from pandas._libs import lib
 from pandas._typing import (
     ArrayLike,
+    Scalar,
     npt,
 )
+from pandas.compat.numpy import np_percentile_argname
 
 from pandas.core.dtypes.missing import (
     isna,
     na_value_for_dtype,
 )
 
-from pandas.core.nanops import nanpercentile
-
 
 def quantile_compat(
     values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
@@ -41,7 +42,7 @@ def quantile_compat(
 
 def quantile_with_mask(
     values: np.ndarray,
-    mask: np.ndarray,
+    mask: npt.NDArray[np.bool_],
     fill_value,
     qs: npt.NDArray[np.float64],
     interpolation: str,
@@ -84,10 +85,9 @@ def quantile_with_mask(
         flat = np.array([fill_value] * len(qs))
         result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
     else:
-        # asarray needed for Sparse, see GH#24600
-        result = nanpercentile(
+        result = _nanpercentile(
             values,
-            np.array(qs) * 100,
+            qs * 100.0,
             na_value=fill_value,
             mask=mask,
             interpolation=interpolation,
@@ -97,3 +97,92 @@ def quantile_with_mask(
         result = result.T
 
     return result
+
+
+def _nanpercentile_1d(
+    values: np.ndarray,
+    mask: npt.NDArray[np.bool_],
+    qs: npt.NDArray[np.float64],
+    na_value: Scalar,
+    interpolation,
+) -> Scalar | np.ndarray:
+    """
+    Wrapper for np.percentile that skips missing values, specialized to
+    1-dimensional case.
+
+    Parameters
+    ----------
+    values : array over which to find quantiles
+    mask : ndarray[bool]
+        locations in values that should be considered missing
+    qs : np.ndarray[float64] of quantile indices to find
+    na_value : scalar
+        value to return for empty or all-null values
+    interpolation : str
+
+    Returns
+    -------
+    quantiles : scalar or array
+    """
+    # mask is Union[ExtensionArray, ndarray]
+    values = values[~mask]
+
+    if len(values) == 0:
+        return np.array([na_value] * len(qs), dtype=values.dtype)
+
+    return np.percentile(values, qs, **{np_percentile_argname: interpolation})
+
+
+def _nanpercentile(
+    values: np.ndarray,
+    qs: npt.NDArray[np.float64],
+    *,
+    na_value,
+    mask: npt.NDArray[np.bool_],
+    interpolation,
+):
+    """
+    Wrapper for np.percentile that skips missing values.
+
+    Parameters
+    ----------
+    values : np.ndarray[ndim=2]  over which to find quantiles
+    qs : np.ndarray[float64] of quantile indices to find
+    na_value : scalar
+        value to return for empty or all-null values
+    mask : np.ndarray[bool]
+        locations in values that should be considered missing
+    interpolation : str
+
+    Returns
+    -------
+    quantiles : scalar or array
+    """
+
+    if values.dtype.kind in ["m", "M"]:
+        # need to cast to integer to avoid rounding errors in numpy
+        result = _nanpercentile(
+            values.view("i8"),
+            qs=qs,
+            na_value=na_value.view("i8"),
+            mask=mask,
+            interpolation=interpolation,
+        )
+
+        # Note: we have to do `astype` and not view because in general we
+        #  have float result at this point, not i8
+        return result.astype(values.dtype)
+
+    if not lib.is_scalar(mask) and mask.any():
+        # Caller is responsible for ensuring mask shape match
+        assert mask.shape == values.shape
+        result = [
+            _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
+            for (val, m) in zip(list(values), list(mask))
+        ]
+        result = np.array(result, dtype=values.dtype, copy=False).T
+        return result
+    else:
+        return np.percentile(
+            values, qs, axis=1, **{np_percentile_argname: interpolation}
+        )
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -30,7 +30,6 @@
     npt,
 )
 from pandas.compat._optional import import_optional_dependency
-from pandas.compat.numpy import np_percentile_argname
 
 from pandas.core.dtypes.common import (
     is_any_int_dtype,
@@ -1661,95 +1660,6 @@ def f(x, y):
 nanne = make_nancomp(operator.ne)
 
 
-def _nanpercentile_1d(
-    values: np.ndarray,
-    mask: npt.NDArray[np.bool_],
-    q: np.ndarray,
-    na_value: Scalar,
-    interpolation,
-) -> Scalar | np.ndarray:
-    """
-    Wrapper for np.percentile that skips missing values, specialized to
-    1-dimensional case.
-
-    Parameters
-    ----------
-    values : array over which to find quantiles
-    mask : ndarray[bool]
-        locations in values that should be considered missing
-    q : np.ndarray[float64] of quantile indices to find
-    na_value : scalar
-        value to return for empty or all-null values
-    interpolation : str
-
-    Returns
-    -------
-    quantiles : scalar or array
-    """
-    # mask is Union[ExtensionArray, ndarray]
-    values = values[~mask]
-
-    if len(values) == 0:
-        return np.array([na_value] * len(q), dtype=values.dtype)
-
-    return np.percentile(values, q, **{np_percentile_argname: interpolation})
-
-
-def nanpercentile(
-    values: np.ndarray,
-    q: np.ndarray,
-    *,
-    na_value,
-    mask: npt.NDArray[np.bool_],
-    interpolation,
-):
-    """
-    Wrapper for np.percentile that skips missing values.
-
-    Parameters
-    ----------
-    values : np.ndarray[ndim=2]  over which to find quantiles
-    q : np.ndarray[float64] of quantile indices to find
-    na_value : scalar
-        value to return for empty or all-null values
-    mask : ndarray[bool]
-        locations in values that should be considered missing
-    interpolation : str
-
-    Returns
-    -------
-    quantiles : scalar or array
-    """
-
-    if values.dtype.kind in ["m", "M"]:
-        # need to cast to integer to avoid rounding errors in numpy
-        result = nanpercentile(
-            values.view("i8"),
-            q=q,
-            na_value=na_value.view("i8"),
-            mask=mask,
-            interpolation=interpolation,
-        )
-
-        # Note: we have to do `astype` and not view because in general we
-        #  have float result at this point, not i8
-        return result.astype(values.dtype)
-
-    if not lib.is_scalar(mask) and mask.any():
-        # Caller is responsible for ensuring mask shape match
-        assert mask.shape == values.shape
-        result = [
-            _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation)
-            for (val, m) in zip(list(values), list(mask))
-        ]
-        result = np.array(result, dtype=values.dtype, copy=False).T
-        return result
-    else:
-        return np.percentile(
-            values, q, axis=1, **{np_percentile_argname: interpolation}
-        )
-
-
 def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
     """
     Cumulative function with skipna support.