pandas-dev · jreback · Oct 3, 2019 · Jul 25, 2019 · Jul 25, 2019 · Jul 25, 2019
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -3,7 +3,7 @@
 intended for public consumption
 """
 from textwrap import dedent
-from typing import Dict
+from typing import Dict, Iterable, Union
 from warnings import catch_warnings, simplefilter, warn
 
 import numpy as np
@@ -114,10 +114,10 @@ def _ensure_data(values, dtype=None):
 
     # datetimelike
     if (
-        needs_i8_conversion(values)
-        or is_period_dtype(dtype)
-        or is_datetime64_any_dtype(dtype)
-        or is_timedelta64_dtype(dtype)
+            needs_i8_conversion(values)
+            or is_period_dtype(dtype)
+            or is_datetime64_any_dtype(dtype)
+            or is_timedelta64_dtype(dtype)
     ):
         if is_period_dtype(values) or is_period_dtype(dtype):
             from pandas import PeriodIndex
@@ -146,7 +146,7 @@ def _ensure_data(values, dtype=None):
         return values.asi8, dtype, "int64"
 
     elif is_categorical_dtype(values) and (
-        is_categorical_dtype(dtype) or dtype is None
+            is_categorical_dtype(dtype) or dtype is None
     ):
         values = getattr(values, "values", values)
         values = values.codes
@@ -248,7 +248,6 @@ def _get_hashtable_algo(values):
 
 
 def _get_data_algo(values, func_map):
-
     if is_categorical_dtype(values):
         values = values._values_for_rank()
 
@@ -299,7 +298,6 @@ def match(to_match, values, na_sentinel=-1):
     result = table.lookup(to_match)
 
     if na_sentinel != -1:
-
         # replace but return a numpy array
         # use a Series because it handles dtype conversions properly
         from pandas import Series
@@ -657,9 +655,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
         values, dtype, _ = _ensure_data(values)
 
         if (
-            is_datetime64_any_dtype(original)
-            or is_timedelta64_dtype(original)
-            or is_period_dtype(original)
+                is_datetime64_any_dtype(original)
+                or is_timedelta64_dtype(original)
+                or is_period_dtype(original)
         ):
             na_value = na_value_for_dtype(original.dtype)
         else:
@@ -690,7 +688,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
 
 def value_counts(
-    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
+        values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
 ):
     """
     Compute a histogram of the counts of non-null values.
@@ -993,10 +991,10 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
         to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
     else:
         to_raise = (
-            ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any()
-            or (
-                (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2]
-            ).any()
+                ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any()
+                or (
+                        (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2]
+                ).any()
         )
 
     if to_raise:
@@ -1129,8 +1127,8 @@ def is_valid_dtype_n_method(dtype):
         nsmallest/nlargest methods
         """
         return (
-            is_numeric_dtype(dtype) and not is_complex_dtype(dtype)
-        ) or needs_i8_conversion(dtype)
+                       is_numeric_dtype(dtype) and not is_complex_dtype(dtype)
+               ) or needs_i8_conversion(dtype)
 
 
 class SelectNSeries(SelectN):
@@ -1165,7 +1163,6 @@ def compute(self, method):
 
         # slow method
         if n >= len(self.obj):
-
             reverse_it = self.keep == "last" or method == "nlargest"
             ascending = method == "nsmallest"
             slc = np.s_[::-1] if reverse_it else np.s_[:]
@@ -1603,7 +1600,7 @@ def take(arr, indices, axis=0, allow_fill=False, fill_value=None):
 
 
 def take_nd(
-    arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
+        arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
 ):
     """
     Specialized Cython take which sets NaN values in one pass
@@ -1720,7 +1717,7 @@ def take_nd(
 
 
 def take_2d_multi(
-    arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
+        arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True
 ):
     """
     Specialized Cython take which sets NaN values in one pass
@@ -1779,7 +1776,6 @@ def take_2d_multi(
         if func is not None:
             func = _convert_wrapper(func, out.dtype)
     if func is None:
-
         def func(arr, indexer, out, fill_value=np.nan):
             _take_2d_multi_object(
                 arr, indexer, out, fill_value=fill_value, mask_info=mask_info
@@ -1842,9 +1838,9 @@ def searchsorted(arr, value, side="left", sorter=None):
         sorter = ensure_platform_int(sorter)
 
     if (
-        isinstance(arr, np.ndarray)
-        and is_integer_dtype(arr)
-        and (is_integer(value) or is_integer_dtype(value))
+            isinstance(arr, np.ndarray)
+            and is_integer_dtype(arr)
+            and (is_integer(value) or is_integer_dtype(value))
     ):
         # if `arr` and `value` have different dtypes, `arr` would be
         # recast by numpy, causing a slow search.
@@ -1864,7 +1860,7 @@ def searchsorted(arr, value, side="left", sorter=None):
         else:
             value = array(value, dtype=dtype)
     elif not (
-        is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr)
+            is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr)
     ):
         from pandas.core.series import Series
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -32,7 +32,7 @@
     deprecate_kwarg,
     rewrite_axis_style_signature,
 )
-from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg
+from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg, validate_percentile
 
 from pandas.core.dtypes.cast import (
     cast_scalar_to_array,
@@ -8225,7 +8225,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
         C        1 days 12:00:00
         Name: 0.5, dtype: object
         """
-        self._check_percentile(q)
+        validate_percentile(q)
 
         data = self._get_numeric_data() if numeric_only else self
         axis = self._get_axis_number(axis)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -31,7 +31,7 @@
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
-from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
+from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs, validate_percentile
 
 from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
 from pandas.core.dtypes.common import (
@@ -10169,7 +10169,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
             percentiles = list(percentiles)
 
             # get them all to be in [0, 1]
-            self._check_percentile(percentiles)
+            validate_percentile(percentiles)
 
             # median should always be included
             if 0.5 not in percentiles:
@@ -10273,21 +10273,6 @@ def describe_1d(data):
         d.columns = data.columns.copy()
         return d
 
-    def _check_percentile(self, q):
-        """
-        Validate percentiles (used by describe and quantile).
-        """
-
-        msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
-        q = np.asarray(q)
-        if q.ndim == 0:
-            if not 0 <= q <= 1:
-                raise ValueError(msg.format(q / 100.0))
-        else:
-            if not all(0 <= qs <= 1 for qs in q):
-                raise ValueError(msg.format(q / 100.0))
-        return q
-
     _shared_docs[
         "pct_change"
     ] = """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -16,7 +16,7 @@
 from pandas.compat import PY36
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution, deprecate
-from pandas.util._validators import validate_bool_kwarg
+from pandas.util._validators import validate_bool_kwarg, validate_percentile
 
 from pandas.core.dtypes.common import (
     _is_unorderable_exception,
@@ -2353,7 +2353,7 @@ def quantile(self, q=0.5, interpolation="linear"):
         dtype: float64
         """
 
-        self._check_percentile(q)
+        validate_percentile(q)
 
         # We dispatch to DataFrame so that core.internals only has to worry
         #  about 2D cases.

diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
@@ -3,6 +3,8 @@
 for validating data or function arguments
 """
 import warnings
+import numpy as np
+from typing import Union, Iterable
 
 from pandas.core.dtypes.common import is_bool
 
@@ -370,3 +372,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
         raise ValueError("Cannot specify both 'value' and 'method'.")
 
     return value, method
+
+
+def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
+    """
+    Validate percentiles (used by describe and quantile).
+
+    This function checks if the given float oriterable of floats is a valid percentile
+    otherwise raises a ValueError.
+
+    Parameters
+    ----
+    q: float or iterable of floats
+        A single percentile or an iterable of percentiles.
+
+    Returns
+    -------
+    ndarray
+        An ndarray of the percentiles if valid.
+
+    Raises
+    ------
+    ValueError if percentiles are not in given interval([0, 1]).
+    """
+    msg = "percentiles should all be in the interval [0, 1]. " "Try {0} instead."
+    q_arr = np.asarray(q)
+    if q_arr.ndim == 0:
+        if not 0 <= q_arr <= 1:
+            raise ValueError(msg.format(q_arr / 100.0))
+    else:
+        if not all(0 <= qs <= 1 for qs in q_arr):
+            raise ValueError(msg.format(q_arr / 100.0))
+    return q_arr