Skip to content

Commit a1412a0

Browse files
DOC: Validator + converting array_like to array-like in docstrings (#41295)
* Converting array_like to array-like in docstrings & comments * Add docstring validation check for array_like vs. array-like * Add unit test for arraylike validator, and fix example in core/generic.py * make method of PandasDocstring Co-authored-by: Marco Gorelli <[email protected]>
1 parent 9d2f1bf commit a1412a0

File tree

12 files changed

+53
-35
lines changed

12 files changed

+53
-35
lines changed

pandas/core/algorithms.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1530,13 +1530,13 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
15301530
Input array. If `sorter` is None, then it must be sorted in
15311531
ascending order, otherwise `sorter` must be an array of indices
15321532
that sort it.
1533-
value : array_like
1533+
value : array-like
15341534
Values to insert into `arr`.
15351535
side : {'left', 'right'}, optional
15361536
If 'left', the index of the first suitable location found is given.
15371537
If 'right', return the last such index. If there is no suitable
15381538
index, return either 0 or N (where N is the length of `self`).
1539-
sorter : 1-D array_like, optional
1539+
sorter : 1-D array-like, optional
15401540
Optional array of integer indices that sort array a into ascending
15411541
order. They are typically the result of argsort.
15421542

pandas/core/array_algos/replace.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -45,21 +45,21 @@ def compare_or_regex_search(
4545
a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: np.ndarray
4646
) -> ArrayLike | bool:
4747
"""
48-
Compare two array_like inputs of the same shape or two scalar values
48+
Compare two array-like inputs of the same shape or two scalar values
4949
5050
Calls operator.eq or re.search, depending on regex argument. If regex is
5151
True, perform an element-wise regex matching.
5252
5353
Parameters
5454
----------
55-
a : array_like
55+
a : array-like
5656
b : scalar or regex pattern
5757
regex : bool
5858
mask : np.ndarray[bool]
5959
6060
Returns
6161
-------
62-
mask : array_like of bool
62+
mask : array-like of bool
6363
"""
6464
if isna(b):
6565
return ~mask

pandas/core/arrays/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -826,13 +826,13 @@ def searchsorted(self, value, side="left", sorter=None):
826826
827827
Parameters
828828
----------
829-
value : array_like
829+
value : array-like
830830
Values to insert into `self`.
831831
side : {'left', 'right'}, optional
832832
If 'left', the index of the first suitable location found is given.
833833
If 'right', return the last such index. If there is no suitable
834834
index, return either 0 or N (where N is the length of `self`).
835-
sorter : 1-D array_like, optional
835+
sorter : 1-D array-like, optional
836836
Optional array of integer indices that sort array a into ascending
837837
order. They are typically the result of argsort.
838838

pandas/core/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1133,13 +1133,13 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1):
11331133
11341134
Parameters
11351135
----------
1136-
value : array_like
1136+
value : array-like
11371137
Values to insert into `self`.
11381138
side : {{'left', 'right'}}, optional
11391139
If 'left', the index of the first suitable location found is given.
11401140
If 'right', return the last such index. If there is no suitable
11411141
index, return either 0 or N (where N is the length of `self`).
1142-
sorter : 1-D array_like, optional
1142+
sorter : 1-D array-like, optional
11431143
Optional array of integer indices that sort `self` into ascending
11441144
order. They are typically the result of ``np.argsort``.
11451145

pandas/core/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7285,11 +7285,11 @@ def clip(
72857285
72867286
Parameters
72877287
----------
7288-
lower : float or array_like, default None
7288+
lower : float or array-like, default None
72897289
Minimum threshold value. All values below this
72907290
threshold will be set to it. A missing
72917291
threshold (e.g `NA`) will not clip the value.
7292-
upper : float or array_like, default None
7292+
upper : float or array-like, default None
72937293
Maximum threshold value. All values above this
72947294
threshold will be set to it. A missing
72957295
threshold (e.g `NA`) will not clip the value.
@@ -7889,8 +7889,8 @@ def resample(
78897889
78907890
Pass a custom function via ``apply``
78917891
7892-
>>> def custom_resampler(array_like):
7893-
... return np.sum(array_like) + 5
7892+
>>> def custom_resampler(arraylike):
7893+
... return np.sum(arraylike) + 5
78947894
...
78957895
>>> series.resample('3T').apply(custom_resampler)
78967896
2000-01-01 00:00:00 8

pandas/core/indexes/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -6025,8 +6025,8 @@ def any(self, *args, **kwargs):
60256025
60266026
Returns
60276027
-------
6028-
any : bool or array_like (if axis is specified)
6029-
A single element array_like may be converted to bool.
6028+
any : bool or array-like (if axis is specified)
6029+
A single element array-like may be converted to bool.
60306030
60316031
See Also
60326032
--------
@@ -6069,8 +6069,8 @@ def all(self, *args, **kwargs):
60696069
60706070
Returns
60716071
-------
6072-
all : bool or array_like (if axis is specified)
6073-
A single element array_like may be converted to bool.
6072+
all : bool or array-like (if axis is specified)
6073+
A single element array-like may be converted to bool.
60746074
60756075
See Also
60766076
--------

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3877,7 +3877,7 @@ def maybe_droplevels(index: Index, key) -> Index:
38773877

38783878
def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
38793879
"""
3880-
Coerce the array_like indexer to the smallest integer dtype that can encode all
3880+
Coerce the array-like indexer to the smallest integer dtype that can encode all
38813881
of the given categories.
38823882
38833883
Parameters

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1276,7 +1276,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
12761276
-------
12771277
blocks : list of Block
12781278
New blocks of unstacked values.
1279-
mask : array_like of bool
1279+
mask : array-like of bool
12801280
The mask of columns of `blocks` we should keep.
12811281
"""
12821282
new_values, mask = unstacker.get_new_values(

pandas/core/missing.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,11 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
524524
525525
Parameters
526526
----------
527-
xi : array_like
527+
xi : array-like
528528
sorted 1D array of x-coordinates
529-
yi : array_like or list of array-likes
529+
yi : array-like or list of array-likes
530530
yi[i][j] is the j-th derivative known at xi[i]
531-
order: None or int or array_like of ints. Default: None.
531+
order: None or int or array-like of ints. Default: None.
532532
Specifies the degree of local polynomials. If not None, some
533533
derivatives are ignored.
534534
der : int or list
@@ -546,7 +546,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
546546
547547
Returns
548548
-------
549-
y : scalar or array_like
549+
y : scalar or array-like
550550
The result, of length R or length M or M by R.
551551
"""
552552
from scipy import interpolate
@@ -568,13 +568,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
568568
569569
Parameters
570570
----------
571-
xi : array_like
571+
xi : array-like
572572
A sorted list of x-coordinates, of length N.
573-
yi : array_like
573+
yi : array-like
574574
A 1-D array of real values. `yi`'s length along the interpolation
575575
axis must be equal to the length of `xi`. If N-D array, use axis
576576
parameter to select correct axis.
577-
x : scalar or array_like
577+
x : scalar or array-like
578578
Of length M.
579579
der : int, optional
580580
How many derivatives to extract; None for all potentially
@@ -590,7 +590,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
590590
591591
Returns
592592
-------
593-
y : scalar or array_like
593+
y : scalar or array-like
594594
The result, of length R or length M or M by R,
595595
596596
"""
@@ -609,14 +609,14 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
609609
610610
Parameters
611611
----------
612-
xi : array_like, shape (n,)
612+
xi : array-like, shape (n,)
613613
1-d array containing values of the independent variable.
614614
Values must be real, finite and in strictly increasing order.
615-
yi : array_like
615+
yi : array-like
616616
Array containing values of the dependent variable. It can have
617617
arbitrary number of dimensions, but the length along ``axis``
618618
(see below) must match the length of ``x``. Values must be finite.
619-
x : scalar or array_like, shape (m,)
619+
x : scalar or array-like, shape (m,)
620620
axis : int, optional
621621
Axis along which `y` is assumed to be varying. Meaning that for
622622
``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
@@ -644,7 +644,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
644644
tuple `(order, deriv_values)` allowing to specify arbitrary
645645
derivatives at curve ends:
646646
* `order`: the derivative order, 1 or 2.
647-
* `deriv_value`: array_like containing derivative values, shape must
647+
* `deriv_value`: array-like containing derivative values, shape must
648648
be the same as `y`, excluding ``axis`` dimension. For example, if
649649
`y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
650650
the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
@@ -661,7 +661,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
661661
662662
Returns
663663
-------
664-
y : scalar or array_like
664+
y : scalar or array-like
665665
The result, of shape (m,)
666666
667667
References

pandas/plotting/_core.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def hist_frame(
430430
y : label or position, optional
431431
Allows plotting of one column versus another. If not specified,
432432
all numerical columns are used.
433-
color : str, array_like, or dict, optional
433+
color : str, array-like, or dict, optional
434434
The color for each of the DataFrame's columns. Possible values are:
435435
436436
- A single color string referred to by name, RGB or RGBA code,
@@ -1571,7 +1571,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs):
15711571
y : int or str
15721572
The column name or column position to be used as vertical
15731573
coordinates for each point.
1574-
s : str, scalar or array_like, optional
1574+
s : str, scalar or array-like, optional
15751575
The size of each point. Possible values are:
15761576
15771577
- A string with the name of the column to be used for marker's size.
@@ -1584,7 +1584,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs):
15841584
15851585
.. versionchanged:: 1.1.0
15861586
1587-
c : str, int or array_like, optional
1587+
c : str, int or array-like, optional
15881588
The color of each point. Possible values are:
15891589
15901590
- A single color string referred to by name, RGB or RGBA code,

scripts/tests/test_validate_docstrings.py

+11
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ def missing_whitespace_after_comma(self):
8282
"""
8383
pass
8484

85+
def write_array_like_with_hyphen_not_underscore(self):
86+
"""
87+
In docstrings, use array-like over array_like
88+
"""
89+
pass
90+
8591

8692
class TestValidator:
8793
def _import_path(self, klass=None, func=None):
@@ -172,6 +178,11 @@ def test_bad_class(self, capsys):
172178
"missing_whitespace_after_comma",
173179
("flake8 error: E231 missing whitespace after ',' (3 times)",),
174180
),
181+
(
182+
"BadDocstrings",
183+
"write_array_like_with_hyphen_not_underscore",
184+
("Use 'array-like' rather than 'array_like' in docstrings",),
185+
),
175186
],
176187
)
177188
def test_bad_docstrings(self, capsys, klass, func, msgs):

scripts/validate_docstrings.py

+7
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
ERROR_MSGS = {
5555
"GL04": "Private classes ({mentioned_private_classes}) should not be "
5656
"mentioned in public docstrings",
57+
"GL05": "Use 'array-like' rather than 'array_like' in docstrings.",
5758
"SA05": "{reference_name} in `See Also` section does not need `pandas` "
5859
"prefix, use {right_reference} instead.",
5960
"EX02": "Examples do not pass tests:\n{doctest_log}",
@@ -196,6 +197,9 @@ def validate_pep8(self):
196197
error_count, error_code, message = error_message.split(maxsplit=2)
197198
yield error_code, message, int(error_count)
198199

200+
def non_hyphenated_array_like(self):
201+
return "array_like" in self.raw_doc
202+
199203

200204
def pandas_validate(func_name: str):
201205
"""
@@ -256,6 +260,9 @@ def pandas_validate(func_name: str):
256260
pandas_error("EX04", imported_library=wrong_import)
257261
)
258262

263+
if doc.non_hyphenated_array_like():
264+
result["errors"].append(pandas_error("GL05"))
265+
259266
return result
260267

261268

0 commit comments

Comments
 (0)