forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path_validators.py
450 lines (362 loc) · 13.9 KB
/
_validators.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
"""
Module that contains many useful utilities
for validating data or function arguments
"""
from __future__ import annotations
from typing import (
Iterable,
Sequence,
TypeVar,
overload,
)
import numpy as np
from pandas._libs import lib
from pandas.core.dtypes.common import (
is_bool,
is_integer,
)
BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args):
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args):
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = list(diff)[0]
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args) -> None:
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, compat_args
) -> None:
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(
value: BoolishNoneT, arg_name, none_allowed: bool = True, int_allowed: bool = False
) -> BoolishNoneT:
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or (value is None)
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value # pyright: ignore[reportGeneralTypeIssues]
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
if value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
elif not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg.format(q_arr / 100.0))
return q_arr
@overload
def validate_ascending(ascending: BoolishT) -> BoolishT:
...
@overload
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
...
def validate_ascending(
ascending: bool | int | Sequence[BoolishT],
) -> bool | int | list[BoolishT]:
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, Sequence):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
"""
Check that the `closed` argument is among [None, "left", "right"]
Parameters
----------
closed : {None, "left", "right"}
Returns
-------
left_closed : bool
right_closed : bool
Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
return left_closed, right_closed
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
"""
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
Parameters
----------
inclusive : {"both", "neither", "left", "right"}
Returns
-------
left_right_inclusive : tuple[bool, bool]
Raises
------
ValueError : if argument is not among valid values
"""
left_right_inclusive: tuple[bool, bool] | None = None
if isinstance(inclusive, str):
left_right_inclusive = {
"both": (True, True),
"left": (True, False),
"right": (False, True),
"neither": (False, False),
}.get(inclusive)
if left_right_inclusive is None:
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)
return left_right_inclusive
def validate_insert_loc(loc: int, length: int) -> int:
"""
Check that we have an integer between -length and length, inclusive.
Standardize negative loc to within [0, length].
The exceptions we raise on failure match np.insert.
"""
if not is_integer(loc):
raise TypeError(f"loc must be an integer between -{length} and {length}")
if loc < 0:
loc += length
if not 0 <= loc <= length:
raise IndexError(f"loc must be an integer between -{length} and {length}")
return loc # pyright: ignore[reportGeneralTypeIssues]
def check_dtype_backend(dtype_backend) -> None:
if dtype_backend is not lib.no_default:
if dtype_backend not in ["numpy_nullable", "pyarrow"]:
raise ValueError(
f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
f"'pyarrow' are allowed.",
)