From e102d88b6c7eed4121afab3ff6b32c46f839123a Mon Sep 17 00:00:00 2001
From: Kelly McBride <kemcbride28@gmail.com>
Date: Mon, 3 May 2021 15:20:30 -0700
Subject: [PATCH 1/4] Converting array_like to array-like in docstrings &
 comments

---
 pandas/core/algorithms.py          |  4 ++--
 pandas/core/array_algos/replace.py |  6 +++---
 pandas/core/arrays/base.py         |  4 ++--
 pandas/core/base.py                |  4 ++--
 pandas/core/generic.py             |  4 ++--
 pandas/core/indexes/base.py        |  8 ++++----
 pandas/core/indexes/multi.py       |  2 +-
 pandas/core/internals/blocks.py    |  2 +-
 pandas/core/missing.py             | 26 +++++++++++++-------------
 pandas/plotting/_core.py           |  6 +++---
 10 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 2c4477056a112..ccc5349865ae4 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1544,13 +1544,13 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
         Input array. If `sorter` is None, then it must be sorted in
         ascending order, otherwise `sorter` must be an array of indices
         that sort it.
-    value : array_like
+    value : array-like
         Values to insert into `arr`.
     side : {'left', 'right'}, optional
         If 'left', the index of the first suitable location found is given.
         If 'right', return the last such index.  If there is no suitable
         index, return either 0 or N (where N is the length of `self`).
-    sorter : 1-D array_like, optional
+    sorter : 1-D array-like, optional
         Optional array of integer indices that sort array a into ascending
         order. They are typically the result of argsort.
 
diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
index 201b9fdcc51cc..3a4b01d48b4b6 100644
--- a/pandas/core/array_algos/replace.py
+++ b/pandas/core/array_algos/replace.py
@@ -45,21 +45,21 @@ def compare_or_regex_search(
     a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: np.ndarray
 ) -> Union[ArrayLike, bool]:
     """
-    Compare two array_like inputs of the same shape or two scalar values
+    Compare two array-like inputs of the same shape or two scalar values
 
     Calls operator.eq or re.search, depending on regex argument. If regex is
     True, perform an element-wise regex matching.
 
     Parameters
     ----------
-    a : array_like
+    a : array-like
     b : scalar or regex pattern
     regex : bool
     mask : np.ndarray[bool]
 
     Returns
     -------
-    mask : array_like of bool
+    mask : array-like of bool
     """
     if isna(b):
         return ~mask
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index bd01191719143..51e0bc7787634 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -826,13 +826,13 @@ def searchsorted(self, value, side="left", sorter=None):
 
         Parameters
         ----------
-        value : array_like
+        value : array-like
             Values to insert into `self`.
         side : {'left', 'right'}, optional
             If 'left', the index of the first suitable location found is given.
             If 'right', return the last such index.  If there is no suitable
             index, return either 0 or N (where N is the length of `self`).
-        sorter : 1-D array_like, optional
+        sorter : 1-D array-like, optional
             Optional array of integer indices that sort array a into ascending
             order. They are typically the result of argsort.
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 3270e3dd82f7d..f0faa5d4037f4 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1160,13 +1160,13 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1):
 
         Parameters
         ----------
-        value : array_like
+        value : array-like
             Values to insert into `self`.
         side : {{'left', 'right'}}, optional
             If 'left', the index of the first suitable location found is given.
             If 'right', return the last such index.  If there is no suitable
             index, return either 0 or N (where N is the length of `self`).
-        sorter : 1-D array_like, optional
+        sorter : 1-D array-like, optional
             Optional array of integer indices that sort `self` into ascending
             order. They are typically the result of ``np.argsort``.
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d225ac6e6881b..c5d11894ec9ca 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7489,11 +7489,11 @@ def clip(
 
         Parameters
         ----------
-        lower : float or array_like, default None
+        lower : float or array-like, default None
             Minimum threshold value. All values below this
             threshold will be set to it. A missing
             threshold (e.g `NA`) will not clip the value.
-        upper : float or array_like, default None
+        upper : float or array-like, default None
             Maximum threshold value. All values above this
             threshold will be set to it. A missing
             threshold (e.g `NA`) will not clip the value.
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 7779335bfd3ba..f1a9a76d39a2b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6065,8 +6065,8 @@ def any(self, *args, **kwargs):
 
         Returns
         -------
-        any : bool or array_like (if axis is specified)
-            A single element array_like may be converted to bool.
+        any : bool or array-like (if axis is specified)
+            A single element array-like may be converted to bool.
 
         See Also
         --------
@@ -6109,8 +6109,8 @@ def all(self, *args, **kwargs):
 
         Returns
         -------
-        all : bool or array_like (if axis is specified)
-            A single element array_like may be converted to bool.
+        all : bool or array-like (if axis is specified)
+            A single element array-like may be converted to bool.
 
         See Also
         --------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 794f13bbfb6b1..dbad372a05478 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3892,7 +3892,7 @@ def maybe_droplevels(index: Index, key) -> Index:
 
 def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
     """
-    Coerce the array_like indexer to the smallest integer dtype that can encode all
+    Coerce the array-like indexer to the smallest integer dtype that can encode all
     of the given categories.
 
     Parameters
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 61396fdf372d5..afd470fdff170 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1278,7 +1278,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
         -------
         blocks : list of Block
             New blocks of unstacked values.
-        mask : array_like of bool
+        mask : array-like of bool
             The mask of columns of `blocks` we should keep.
         """
         new_values, mask = unstacker.get_new_values(
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 8849eb0670faa..424173ccc69f0 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -524,11 +524,11 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
 
     Parameters
     ----------
-    xi : array_like
+    xi : array-like
         sorted 1D array of x-coordinates
-    yi : array_like or list of array-likes
+    yi : array-like or list of array-likes
         yi[i][j] is the j-th derivative known at xi[i]
-    order: None or int or array_like of ints. Default: None.
+    order: None or int or array-like of ints. Default: None.
         Specifies the degree of local polynomials. If not None, some
         derivatives are ignored.
     der : int or list
@@ -546,7 +546,7 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
 
     Returns
     -------
-    y : scalar or array_like
+    y : scalar or array-like
         The result, of length R or length M or M by R.
     """
     from scipy import interpolate
@@ -568,13 +568,13 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
 
     Parameters
     ----------
-    xi : array_like
+    xi : array-like
         A sorted list of x-coordinates, of length N.
-    yi : array_like
+    yi : array-like
         A 1-D array of real values.  `yi`'s length along the interpolation
         axis must be equal to the length of `xi`. If N-D array, use axis
         parameter to select correct axis.
-    x : scalar or array_like
+    x : scalar or array-like
         Of length M.
     der : int, optional
         How many derivatives to extract; None for all potentially
@@ -590,7 +590,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
 
     Returns
     -------
-    y : scalar or array_like
+    y : scalar or array-like
         The result, of length R or length M or M by R,
 
     """
@@ -609,14 +609,14 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
 
     Parameters
     ----------
-    xi : array_like, shape (n,)
+    xi : array-like, shape (n,)
         1-d array containing values of the independent variable.
         Values must be real, finite and in strictly increasing order.
-    yi : array_like
+    yi : array-like
         Array containing values of the dependent variable. It can have
         arbitrary number of dimensions, but the length along ``axis``
         (see below) must match the length of ``x``. Values must be finite.
-    x : scalar or array_like, shape (m,)
+    x : scalar or array-like, shape (m,)
     axis : int, optional
         Axis along which `y` is assumed to be varying. Meaning that for
         ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
@@ -644,7 +644,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
         tuple `(order, deriv_values)` allowing to specify arbitrary
         derivatives at curve ends:
         * `order`: the derivative order, 1 or 2.
-        * `deriv_value`: array_like containing derivative values, shape must
+        * `deriv_value`: array-like containing derivative values, shape must
           be the same as `y`, excluding ``axis`` dimension. For example, if
           `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
           the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
@@ -661,7 +661,7 @@ def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolat
 
     Returns
     -------
-    y : scalar or array_like
+    y : scalar or array-like
         The result, of shape (m,)
 
     References
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 55097054fec88..8d044011004f3 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -427,7 +427,7 @@ def hist_frame(
         y : label or position, optional
             Allows plotting of one column versus another. If not specified,
             all numerical columns are used.
-        color : str, array_like, or dict, optional
+        color : str, array-like, or dict, optional
             The color for each of the DataFrame's columns. Possible values are:
 
             - A single color string referred to by name, RGB or RGBA code,
@@ -1568,7 +1568,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs):
         y : int or str
             The column name or column position to be used as vertical
             coordinates for each point.
-        s : str, scalar or array_like, optional
+        s : str, scalar or array-like, optional
             The size of each point. Possible values are:
 
             - A string with the name of the column to be used for marker's size.
@@ -1581,7 +1581,7 @@ def scatter(self, x, y, s=None, c=None, **kwargs):
 
               .. versionchanged:: 1.1.0
 
-        c : str, int or array_like, optional
+        c : str, int or array-like, optional
             The color of each point. Possible values are:
 
             - A single color string referred to by name, RGB or RGBA code,

From 21393402a86c4811fd74587ab93b85bb9c6e3410 Mon Sep 17 00:00:00 2001
From: Kelly McBride <kemcbride28@gmail.com>
Date: Mon, 3 May 2021 16:37:05 -0700
Subject: [PATCH 2/4] Add docstring validation check for array_like vs.
 array-like

---
 scripts/validate_docstrings.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index d0f32bb554cf9..feab6528109b5 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -56,6 +56,7 @@
 ERROR_MSGS = {
     "GL04": "Private classes ({mentioned_private_classes}) should not be "
     "mentioned in public docstrings",
+    "GL05": "Use 'array-like' rather than 'array_like' in docstrings.",
     "SA05": "{reference_name} in `See Also` section does not need `pandas` "
     "prefix, use {right_reference} instead.",
     "EX02": "Examples do not pass tests:\n{doctest_log}",
@@ -258,6 +259,9 @@ def pandas_validate(func_name: str):
                     pandas_error("EX04", imported_library=wrong_import)
                 )
 
+    if "array_like" in doc.raw_doc:
+        result["errors"].append(pandas_error("GL05"))
+
     return result
 
 

From 6e9c8704425197ff4b21ac5731a3006db93e6eda Mon Sep 17 00:00:00 2001
From: Kelly McBride <kemcbride28@gmail.com>
Date: Wed, 12 May 2021 11:40:41 -0700
Subject: [PATCH 3/4] Add unit test for arraylike validator, and fix example in
 core/generic.py

---
 pandas/core/generic.py                    |  4 ++--
 scripts/tests/test_validate_docstrings.py | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c5d11894ec9ca..a57db67183543 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8097,8 +8097,8 @@ def resample(
 
         Pass a custom function via ``apply``
 
-        >>> def custom_resampler(array_like):
-        ...     return np.sum(array_like) + 5
+        >>> def custom_resampler(arraylike):
+        ...     return np.sum(arraylike) + 5
         ...
         >>> series.resample('3T').apply(custom_resampler)
         2000-01-01 00:00:00     8
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
index 7e4c68ddc183b..4e2d12b674aea 100644
--- a/scripts/tests/test_validate_docstrings.py
+++ b/scripts/tests/test_validate_docstrings.py
@@ -82,6 +82,12 @@ def missing_whitespace_after_comma(self):
         """
         pass
 
+    def write_array_like_with_hyphen_not_underscore(self):
+        """
+        In docstrings, use array-like over array_like
+        """
+        pass
+
 
 class TestValidator:
     def _import_path(self, klass=None, func=None):
@@ -172,6 +178,11 @@ def test_bad_class(self, capsys):
                 "missing_whitespace_after_comma",
                 ("flake8 error: E231 missing whitespace after ',' (3 times)",),
             ),
+            (
+                "BadDocstrings",
+                "write_array_like_with_hyphen_not_underscore",
+                ("Use 'array-like' rather than 'array_like' in docstrings",),
+            ),
         ],
     )
     def test_bad_docstrings(self, capsys, klass, func, msgs):

From dc8895c72bd28b9833a918efb02c0ef59dc67816 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <marcogorelli@protonmail.com>
Date: Sat, 12 Jun 2021 10:44:09 +0100
Subject: [PATCH 4/4] make method of PandasDocstring

---
 scripts/validate_docstrings.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 46f97441cae8d..9b65204403612 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -197,6 +197,9 @@ def validate_pep8(self):
             error_count, error_code, message = error_message.split(maxsplit=2)
             yield error_code, message, int(error_count)
 
+    def non_hyphenated_array_like(self):
+        return "array_like" in self.raw_doc
+
 
 def pandas_validate(func_name: str):
     """
@@ -257,7 +260,7 @@ def pandas_validate(func_name: str):
                     pandas_error("EX04", imported_library=wrong_import)
                 )
 
-    if "array_like" in doc.raw_doc:
+    if doc.non_hyphenated_array_like():
         result["errors"].append(pandas_error("GL05"))
 
     return result