Skip to content

Commit 5686e9a

Browse files
hedonhermdevjreback
authored andcommitted
CLN: Centralised _check_percentile (#27584)
1 parent bf6c5c3 commit 5686e9a

File tree

5 files changed

+49
-24
lines changed

5 files changed

+49
-24
lines changed

pandas/core/algorithms.py

-3
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,6 @@ def _get_hashtable_algo(values):
246246

247247

248248
def _get_data_algo(values, func_map):
249-
250249
if is_categorical_dtype(values):
251250
values = values._values_for_rank()
252251

@@ -297,7 +296,6 @@ def match(to_match, values, na_sentinel=-1):
297296
result = table.lookup(to_match)
298297

299298
if na_sentinel != -1:
300-
301299
# replace but return a numpy array
302300
# use a Series because it handles dtype conversions properly
303301
from pandas import Series
@@ -1163,7 +1161,6 @@ def compute(self, method):
11631161

11641162
# slow method
11651163
if n >= len(self.obj):
1166-
11671164
reverse_it = self.keep == "last" or method == "nlargest"
11681165
ascending = method == "nsmallest"
11691166
slc = np.s_[::-1] if reverse_it else np.s_[:]

pandas/core/frame.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@
3131
deprecate_kwarg,
3232
rewrite_axis_style_signature,
3333
)
34-
from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg
34+
from pandas.util._validators import (
35+
validate_axis_style_args,
36+
validate_bool_kwarg,
37+
validate_percentile,
38+
)
3539

3640
from pandas.core.dtypes.cast import (
3741
cast_scalar_to_array,
@@ -8178,7 +8182,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
81788182
C 1 days 12:00:00
81798183
Name: 0.5, dtype: object
81808184
"""
8181-
self._check_percentile(q)
8185+
validate_percentile(q)
81828186

81838187
data = self._get_numeric_data() if numeric_only else self
81848188
axis = self._get_axis_number(axis)

pandas/core/generic.py

+6-17
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
from pandas.compat.numpy import function as nv
3333
from pandas.errors import AbstractMethodError
3434
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
35-
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
35+
from pandas.util._validators import (
36+
validate_bool_kwarg,
37+
validate_fillna_kwargs,
38+
validate_percentile,
39+
)
3640

3741
from pandas.core.dtypes.common import (
3842
ensure_int64,
@@ -10168,7 +10172,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
1016810172
percentiles = list(percentiles)
1016910173

1017010174
# get them all to be in [0, 1]
10171-
self._check_percentile(percentiles)
10175+
validate_percentile(percentiles)
1017210176

1017310177
# median should always be included
1017410178
if 0.5 not in percentiles:
@@ -10272,21 +10276,6 @@ def describe_1d(data):
1027210276
d.columns = data.columns.copy()
1027310277
return d
1027410278

10275-
def _check_percentile(self, q):
10276-
"""
10277-
Validate percentiles (used by describe and quantile).
10278-
"""
10279-
10280-
msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
10281-
q = np.asarray(q)
10282-
if q.ndim == 0:
10283-
if not 0 <= q <= 1:
10284-
raise ValueError(msg.format(q / 100.0))
10285-
else:
10286-
if not all(0 <= qs <= 1 for qs in q):
10287-
raise ValueError(msg.format(q / 100.0))
10288-
return q
10289-
1029010279
_shared_docs[
1029110280
"pct_change"
1029210281
] = """

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from pandas.compat import PY36
1717
from pandas.compat.numpy import function as nv
1818
from pandas.util._decorators import Appender, Substitution, deprecate
19-
from pandas.util._validators import validate_bool_kwarg
19+
from pandas.util._validators import validate_bool_kwarg, validate_percentile
2020

2121
from pandas.core.dtypes.common import (
2222
_is_unorderable_exception,
@@ -2317,7 +2317,7 @@ def quantile(self, q=0.5, interpolation="linear"):
23172317
dtype: float64
23182318
"""
23192319

2320-
self._check_percentile(q)
2320+
validate_percentile(q)
23212321

23222322
# We dispatch to DataFrame so that core.internals only has to worry
23232323
# about 2D cases.

pandas/util/_validators.py

+35
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
Module that contains many useful utilities
33
for validating data or function arguments
44
"""
5+
from typing import Iterable, Union
56
import warnings
67

8+
import numpy as np
9+
710
from pandas.core.dtypes.common import is_bool
811

912

@@ -370,3 +373,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
370373
raise ValueError("Cannot specify both 'value' and 'method'.")
371374

372375
return value, method
376+
377+
378+
def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
379+
"""
380+
Validate percentiles (used by describe and quantile).
381+
382+
This function checks if the given float oriterable of floats is a valid percentile
383+
otherwise raises a ValueError.
384+
385+
Parameters
386+
----------
387+
q: float or iterable of floats
388+
A single percentile or an iterable of percentiles.
389+
390+
Returns
391+
-------
392+
ndarray
393+
An ndarray of the percentiles if valid.
394+
395+
Raises
396+
------
397+
ValueError if percentiles are not in given interval([0, 1]).
398+
"""
399+
msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
400+
q_arr = np.asarray(q)
401+
if q_arr.ndim == 0:
402+
if not 0 <= q_arr <= 1:
403+
raise ValueError(msg.format(q_arr / 100.0))
404+
else:
405+
if not all(0 <= qs <= 1 for qs in q_arr):
406+
raise ValueError(msg.format(q_arr / 100.0))
407+
return q_arr

0 commit comments

Comments
 (0)