From e102d88b6c7eed4121afab3ff6b32c46f839123a Mon Sep 17 00:00:00 2001 From: Kelly McBride Date: Mon, 3 May 2021 15:20:30 -0700 Subject: [PATCH 1/4] Converting array_like to array-like in docstrings & comments --- pandas/core/algorithms.py | 4 ++-- pandas/core/array_algos/replace.py | 6 +++--- pandas/core/arrays/base.py | 4 ++-- pandas/core/base.py | 4 ++-- pandas/core/generic.py | 4 ++-- pandas/core/indexes/base.py | 8 ++++---- pandas/core/indexes/multi.py | 2 +- pandas/core/internals/blocks.py | 2 +- pandas/core/missing.py | 26 +++++++++++++------------- pandas/plotting/_core.py | 6 +++--- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2c4477056a112..ccc5349865ae4 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1544,13 +1544,13 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: Input array. If `sorter` is None, then it must be sorted in ascending order, otherwise `sorter` must be an array of indices that sort it. - value : array_like + value : array-like Values to insert into `arr`. side : {'left', 'right'}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable index, return either 0 or N (where N is the length of `self`). - sorter : 1-D array_like, optional + sorter : 1-D array-like, optional Optional array of integer indices that sort array a into ascending order. They are typically the result of argsort. diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 201b9fdcc51cc..3a4b01d48b4b6 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -45,21 +45,21 @@ def compare_or_regex_search( a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: np.ndarray ) -> Union[ArrayLike, bool]: """ - Compare two array_like inputs of the same shape or two scalar values + Compare two array-like inputs of the same shape or two scalar values Calls operator.eq or re.search, depending on regex argument. If regex is True, perform an element-wise regex matching. Parameters ---------- - a : array_like + a : array-like b : scalar or regex pattern regex : bool mask : np.ndarray[bool] Returns ------- - mask : array_like of bool + mask : array-like of bool """ if isna(b): return ~mask diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bd01191719143..51e0bc7787634 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -826,13 +826,13 @@ def searchsorted(self, value, side="left", sorter=None): Parameters ---------- - value : array_like + value : array-like Values to insert into `self`. side : {'left', 'right'}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable index, return either 0 or N (where N is the length of `self`). - sorter : 1-D array_like, optional + sorter : 1-D array-like, optional Optional array of integer indices that sort array a into ascending order. They are typically the result of argsort. diff --git a/pandas/core/base.py b/pandas/core/base.py index 3270e3dd82f7d..f0faa5d4037f4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1160,13 +1160,13 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1): Parameters ---------- - value : array_like + value : array-like Values to insert into `self`. side : {{'left', 'right'}}, optional If 'left', the index of the first suitable location found is given. If 'right', return the last such index. If there is no suitable index, return either 0 or N (where N is the length of `self`). - sorter : 1-D array_like, optional + sorter : 1-D array-like, optional Optional array of integer indices that sort `self` into ascending order. They are typically the result of ``np.argsort``. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d225ac6e6881b..c5d11894ec9ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7489,11 +7489,11 @@ def clip( Parameters ---------- - lower : float or array_like, default None + lower : float or array-like, default None Minimum threshold value. All values below this threshold will be set to it. A missing threshold (e.g `NA`) will not clip the value. - upper : float or array_like, default None + upper : float or array-like, default None Maximum threshold value. All values above this threshold will be set to it. A missing threshold (e.g `NA`) will not clip the value. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7779335bfd3ba..f1a9a76d39a2b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6065,8 +6065,8 @@ def any(self, *args, **kwargs): Returns ------- - any : bool or array_like (if axis is specified) - A single element array_like may be converted to bool. + any : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. See Also -------- @@ -6109,8 +6109,8 @@ def all(self, *args, **kwargs): Returns ------- - all : bool or array_like (if axis is specified) - A single element array_like may be converted to bool. + all : bool or array-like (if axis is specified) + A single element array-like may be converted to bool. See Also -------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 794f13bbfb6b1..dbad372a05478 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3892,7 +3892,7 @@ def maybe_droplevels(index: Index, key) -> Index: def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: """ - Coerce the array_like indexer to the smallest integer dtype that can encode all + Coerce the array-like indexer to the smallest integer dtype that can encode all of the given categories. Parameters diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 61396fdf372d5..afd470fdff170 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1278,7 +1278,7 @@ def _unstack(self, unstacker, fill_value, new_placement): ------- blocks : list of Block New blocks of unstacked values. - mask : array_like of bool + mask : array-like of bool The mask of columns of `blocks` we should keep. """ new_values, mask = unstacker.get_new_values( diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8849eb0670faa..424173ccc69f0 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -524,11 +524,11 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): Parameters ---------- - xi : array_like + xi : array-like sorted 1D array of x-coordinates - yi : array_like or list of array-likes + yi : array-like or list of array-likes yi[i][j] is the j-th derivative known at xi[i] - order: None or int or array_like of ints. Default: None. + order: None or int or array-like of ints. Default: None. Specifies the degree of local polynomials. If not None, some derivatives are ignored. der : int or list @@ -546,7 +546,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): Returns ------- - y : scalar or array_like + y : scalar or array-like The result, of length R or length M or M by R. """ from scipy import interpolate @@ -568,13 +568,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): Parameters ---------- - xi : array_like + xi : array-like A sorted list of x-coordinates, of length N. - yi : array_like + yi : array-like A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. - x : scalar or array_like + x : scalar or array-like Of length M. der : int, optional How many derivatives to extract; None for all potentially @@ -590,7 +590,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): Returns ------- - y : scalar or array_like + y : scalar or array-like The result, of length R or length M or M by R, """ @@ -609,14 +609,14 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat Parameters ---------- - xi : array_like, shape (n,) + xi : array-like, shape (n,) 1-d array containing values of the independent variable. Values must be real, finite and in strictly increasing order. - yi : array_like + yi : array-like Array containing values of the dependent variable. It can have arbitrary number of dimensions, but the length along ``axis`` (see below) must match the length of ``x``. Values must be finite. - x : scalar or array_like, shape (m,) + x : scalar or array-like, shape (m,) axis : int, optional Axis along which `y` is assumed to be varying. Meaning that for ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. @@ -644,7 +644,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat tuple `(order, deriv_values)` allowing to specify arbitrary derivatives at curve ends: * `order`: the derivative order, 1 or 2. - * `deriv_value`: array_like containing derivative values, shape must + * `deriv_value`: array-like containing derivative values, shape must be the same as `y`, excluding ``axis`` dimension. For example, if `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D @@ -661,7 +661,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat Returns ------- - y : scalar or array_like + y : scalar or array-like The result, of shape (m,) References diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 55097054fec88..8d044011004f3 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -427,7 +427,7 @@ def hist_frame( y : label or position, optional Allows plotting of one column versus another. If not specified, all numerical columns are used. - color : str, array_like, or dict, optional + color : str, array-like, or dict, optional The color for each of the DataFrame's columns. Possible values are: - A single color string referred to by name, RGB or RGBA code, @@ -1568,7 +1568,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs): y : int or str The column name or column position to be used as vertical coordinates for each point. - s : str, scalar or array_like, optional + s : str, scalar or array-like, optional The size of each point. Possible values are: - A string with the name of the column to be used for marker's size. @@ -1581,7 +1581,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs): .. versionchanged:: 1.1.0 - c : str, int or array_like, optional + c : str, int or array-like, optional The color of each point. Possible values are: - A single color string referred to by name, RGB or RGBA code, From 21393402a86c4811fd74587ab93b85bb9c6e3410 Mon Sep 17 00:00:00 2001 From: Kelly McBride Date: Mon, 3 May 2021 16:37:05 -0700 Subject: [PATCH 2/4] Add docstring validation check for array_like vs. array-like --- scripts/validate_docstrings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index d0f32bb554cf9..feab6528109b5 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -56,6 +56,7 @@ ERROR_MSGS = { "GL04": "Private classes ({mentioned_private_classes}) should not be " "mentioned in public docstrings", + "GL05": "Use 'array-like' rather than 'array_like' in docstrings.", "SA05": "{reference_name} in `See Also` section does not need `pandas` " "prefix, use {right_reference} instead.", "EX02": "Examples do not pass tests:\n{doctest_log}", @@ -258,6 +259,9 @@ def pandas_validate(func_name: str): pandas_error("EX04", imported_library=wrong_import) ) + if "array_like" in doc.raw_doc: + result["errors"].append(pandas_error("GL05")) + return result From 6e9c8704425197ff4b21ac5731a3006db93e6eda Mon Sep 17 00:00:00 2001 From: Kelly McBride Date: Wed, 12 May 2021 11:40:41 -0700 Subject: [PATCH 3/4] Add unit test for arraylike validator, and fix example in core/generic.py --- pandas/core/generic.py | 4 ++-- scripts/tests/test_validate_docstrings.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c5d11894ec9ca..a57db67183543 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8097,8 +8097,8 @@ def resample( Pass a custom function via ``apply`` - >>> def custom_resampler(array_like): - ... return np.sum(array_like) + 5 + >>> def custom_resampler(arraylike): + ... return np.sum(arraylike) + 5 ... >>> series.resample('3T').apply(custom_resampler) 2000-01-01 00:00:00 8 diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 7e4c68ddc183b..4e2d12b674aea 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -82,6 +82,12 @@ def missing_whitespace_after_comma(self): """ pass + def write_array_like_with_hyphen_not_underscore(self): + """ + In docstrings, use array-like over array_like + """ + pass + class TestValidator: def _import_path(self, klass=None, func=None): @@ -172,6 +178,11 @@ def test_bad_class(self, capsys): "missing_whitespace_after_comma", ("flake8 error: E231 missing whitespace after ',' (3 times)",), ), + ( + "BadDocstrings", + "write_array_like_with_hyphen_not_underscore", + ("Use 'array-like' rather than 'array_like' in docstrings",), + ), ], ) def test_bad_docstrings(self, capsys, klass, func, msgs): From dc8895c72bd28b9833a918efb02c0ef59dc67816 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 12 Jun 2021 10:44:09 +0100 Subject: [PATCH 4/4] make method of PandasDocstring --- scripts/validate_docstrings.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 46f97441cae8d..9b65204403612 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -197,6 +197,9 @@ def validate_pep8(self): error_count, error_code, message = error_message.split(maxsplit=2) yield error_code, message, int(error_count) + def non_hyphenated_array_like(self): + return "array_like" in self.raw_doc + def pandas_validate(func_name: str): """ @@ -257,7 +260,7 @@ def pandas_validate(func_name: str): pandas_error("EX04", imported_library=wrong_import) ) - if "array_like" in doc.raw_doc: + if doc.non_hyphenated_array_like(): result["errors"].append(pandas_error("GL05")) return result