CLN: docstring cleanup (#40713)

jbrockmendel · web-flow · commit 65860fa94ec0 · 2021-04-01T18:35:24.000-04:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -154,9 +154,9 @@ def group_cumprod_float64(float64_t[:, ::1] out,
 
     Parameters
     ----------
-    out : float64 array
+    out : np.ndarray[np.float64, ndim=2]
         Array to store cumprod in.
-    values : float64 array
+    values : np.ndarray[np.float64, ndim=2]
         Values to take cumprod of.
     labels : np.ndarray[np.intp]
         Labels to group by.
@@ -211,9 +211,9 @@ def group_cumsum(numeric[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[ndim=2]
         Array to store cumsum in.
-    values : array
+    values : np.ndarray[ndim=2]
         Values to take cumsum of.
     labels : np.ndarray[np.intp]
         Labels to group by.
@@ -329,12 +329,15 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
 
     Parameters
     ----------
-    out : array of int64_t values which this method will write its results to
-        Missing values will be written to with a value of -1
+    out : np.ndarray[np.uint8]
+        Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`.
-    mask : array of int64_t values where a 1 indicates a missing value
+    values : np.ndarray[np.uint8]
+        Containing the truth value of each element.
+    mask : np.ndarray[np.uint8]
+        Indicating whether a value is na or not.
     direction : {'ffill', 'bfill'}
         Direction for fill to be applied (forwards or backwards, respectively)
     limit : Consecutive values to fill before stopping, or -1 for no limit
@@ -396,12 +399,15 @@ def group_any_all(uint8_t[::1] out,
 
     Parameters
     ----------
-    out : array of values which this method will write its results to
+    out : np.ndarray[np.uint8]
+        Values into which this method will write its results.
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its
         ordering matching up to the corresponding record in `values`
-    values : array containing the truth value of each element
-    mask : array indicating whether a value is na or not
+    values : np.ndarray[np.uint8]
+        Containing the truth value of each element.
+    mask : np.ndarray[np.uint8]
+        Indicating whether a value is na or not.
     val_test : {'any', 'all'}
         String object dictating whether to use any or all truth testing
     skipna : bool
@@ -721,14 +727,17 @@ def group_quantile(ndarray[float64_t] out,
 
     Parameters
     ----------
-    out : ndarray
+    out : np.ndarray[np.float64]
         Array of aggregated values that will be written to.
+    values : np.ndarray
+        Array containing the values to apply the function against.
     labels : ndarray[np.intp]
         Array containing the unique group labels.
     values : ndarray
         Array containing the values to apply the function against.
     q : float
         The quantile value to search for.
+    interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'}
 
     Notes
     -----
@@ -1048,8 +1057,9 @@ def group_rank(float64_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array of float64_t values which this method will write its results to
-    values : array of rank_t values to be ranked
+    out : np.ndarray[np.float64, ndim=2]
+        Values to which this method will write its results.
+    values : np.ndarray of rank_t values to be ranked
     labels : np.ndarray[np.intp]
         Array containing unique label for each group, with its ordering
         matching up to the corresponding record in `values`
@@ -1058,8 +1068,7 @@ def group_rank(float64_t[:, ::1] out,
         groupby functions.
     is_datetimelike : bool
         True if `values` contains datetime-like entries.
-    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default
-        'average'
+    ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
         * average: average rank of group
         * min: lowest rank in group
         * max: highest rank in group
@@ -1120,9 +1129,9 @@ cdef group_min_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[groupby_t, ndim=2]
         Array to store result in.
-    counts : int64 array
+    counts : np.ndarray[int64]
         Input as a zeroed array, populated by group sizes during algorithm
     values : array
         Values to find column-wise min/max of.
@@ -1241,9 +1250,9 @@ def group_cummin_max(groupby_t[:, ::1] out,
 
     Parameters
     ----------
-    out : array
+    out : np.ndarray[groupby_t, ndim=2]
         Array to store cummin/max in.
-    values : array
+    values : np.ndarray[groupby_t, ndim=2]
         Values to take cummin/max of.
     labels : np.ndarray[np.intp]
         Labels to group by.
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -385,7 +385,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
     Returns
     -------
-    iter : iterator of (int, slice or array)
+    list[tuple[int, slice | np.ndarray]]
     """
     # There's blkno in this function's name because it's used in block &
     # blockno handling.
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -916,7 +916,7 @@ def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys,
     """
     Parameters
     ----------
-    index : ndarray
+    index : ndarray[intp]
     labels : ndarray[int64]
     keys : list
     sorted_labels : list[ndarray[int64]]
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
@@ -52,7 +52,7 @@ def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
     stride : int
     length : int
     width : int
-    new_values : typed ndarray
+    new_values : np.ndarray[bool]
         result array
     new_mask : np.ndarray[bool]
         result mask
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -316,7 +316,7 @@ def datetime_to_datetime64(ndarray[object] values):
 
     Returns
     -------
-    result : ndarray[int64_t]
+    result : ndarray[datetime64ns]
     inferred_tz : tzinfo or None
     """
     cdef:
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -315,6 +315,10 @@ def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="rais
     """
     Convert an ndarray to an array of timedeltas. If errors == 'coerce',
     coerce non-convertible objects to NaT. Otherwise, raise.
+
+    Returns
+    -------
+    np.ndarray[timedelta64ns]
     """
 
     cdef:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -943,7 +943,7 @@ def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarra
 
     Returns
     -------
-    duplicated : ndarray
+    duplicated : ndarray[bool]
     """
     values, _ = _ensure_data(values)
     ndtype = values.dtype.name
@@ -1631,7 +1631,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
         number of periods
     axis : {0, 1}
         axis to shift on
-    stacklevel : int
+    stacklevel : int, default 3
         The stacklevel for the lost dtype warning.
 
     Returns
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1452,7 +1452,7 @@ def isna(self):
 
         Returns
         -------
-        a boolean array of whether my values are null
+        np.ndarray[bool] of whether my values are null
 
         See Also
         --------
@@ -1474,7 +1474,7 @@ def notna(self):
 
         Returns
         -------
-        a boolean array of whether my values are not null
+        np.ndarray[bool] of whether my values are not null
 
         See Also
         --------
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -1031,7 +1031,7 @@ def to_pydatetime(self) -> np.ndarray:
 
         Returns
         -------
-        datetimes : ndarray
+        datetimes : ndarray[object]
         """
         return ints_to_pydatetime(self.asi8, tz=self.tz)
 
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -853,7 +853,7 @@ def to_pytimedelta(self) -> np.ndarray:
 
         Returns
         -------
-        datetimes : ndarray
+        timedeltas : ndarray[object]
         """
         return tslibs.ints_to_pytimedelta(self.asi8)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8191,7 +8191,7 @@ def _gotitem(
         Parameters
         ----------
         key : string / list of selections
-        ndim : 1,2
+        ndim : {1, 2}
             requested ndim of result
         subset : object, default None
             subset to act on
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1656,9 +1656,9 @@ def _is_label_reference(self, key, axis=0) -> bool_t:
 
         Parameters
         ----------
-        key: str
+        key : str
             Potential label name
-        axis: int, default 0
+        axis : int, default 0
             Axis perpendicular to the axis that labels are associated with
             (0 means search for column labels, 1 means search for index labels)
 
@@ -1687,14 +1687,14 @@ def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t:
 
         Parameters
         ----------
-        key: str
+        key : str
             Potential label or level name
-        axis: int, default 0
+        axis : int, default 0
             Axis that levels are associated with (0 for index, 1 for columns)
 
         Returns
         -------
-        is_label_or_level: bool
+        bool
         """
         return self._is_level_reference(key, axis=axis) or self._is_label_reference(
             key, axis=axis
@@ -1710,9 +1710,9 @@ def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None:
 
         Parameters
         ----------
-        key: str or object
+        key : str or object
             Label or level name.
-        axis: int, default 0
+        axis : int, default 0
             Axis that levels are associated with (0 for index, 1 for columns).
 
         Raises
@@ -1760,14 +1760,14 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
 
         Parameters
         ----------
-        key: str
+        key : str
             Label or level name.
-        axis: int, default 0
+        axis : int, default 0
             Axis that levels are associated with (0 for index, 1 for columns)
 
         Returns
         -------
-        values: np.ndarray
+        values : np.ndarray
 
         Raises
         ------
diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
@@ -41,7 +41,7 @@ def recode_for_groupby(
 
     Returns
     -------
-    New Categorical
+    Categorical
         If sort=False, the new categories are set to the order of
         appearance in codes (unless ordered=True, in which case the
         original order is preserved), followed by any unrepresented
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -808,7 +808,7 @@ class BinGrouper(BaseGrouper):
     binlabels : the label list
     filter_empty : bool, default False
     mutated : bool, default False
-    indexer : a intp array
+    indexer : np.ndarray[np.intp]
 
     Examples
     --------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2412,7 +2412,7 @@ def isna(self) -> np.ndarray:
 
         Returns
         -------
-        numpy.ndarray
+        numpy.ndarray[bool]
             A boolean array of whether my values are NA.
 
         See Also
@@ -2470,7 +2470,7 @@ def notna(self) -> np.ndarray:
 
         Returns
         -------
-        numpy.ndarray
+        numpy.ndarray[bool]
             Boolean array to indicate which entries are not NA.
 
         See Also
@@ -5499,7 +5499,7 @@ def isin(self, values, level=None):
 
         Returns
         -------
-        is_contained : ndarray
+        is_contained : ndarray[bool]
             NumPy array of boolean values.
 
         See Also
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3999,7 +3999,7 @@ def _gotitem(self, key, ndim, subset=None) -> Series:
         Parameters
         ----------
         key : string / list of selections
-        ndim : 1,2
+        ndim : {1, 2}
             Requested ndim of result.
         subset : object, default None
             Subset to act on.
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -576,7 +576,7 @@ def _adjust_to_origin(arg, origin, unit):
         date to be adjusted
     origin : 'julian' or Timestamp
         origin offset for the arg
-    unit : string
+    unit : str
         passed unit from to_datetime, must be 'D'
 
     Returns
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -239,7 +239,7 @@ def _gotitem(self, key, ndim, subset=None):
         Parameters
         ----------
         key : str / list of selections
-        ndim : 1,2
+        ndim : {1, 2}
             requested ndim of result
         subset : object, default None
             subset to act on
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -431,21 +431,21 @@ class ExcelFormatter:
     ----------
     df : DataFrame or Styler
     na_rep: na representation
-    float_format : string, default None
-            Format string for floating point numbers
+    float_format : str, default None
+        Format string for floating point numbers
     cols : sequence, optional
         Columns to write
     header : bool or sequence of str, default True
         Write out column names. If a list of string is given it is
         assumed to be aliases for the column names
     index : bool, default True
         output row names (index)
-    index_label : string or sequence, default None
-            Column label for index column(s) if desired. If None is given, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the DataFrame uses MultiIndex.
+    index_label : str or sequence, default None
+        Column label for index column(s) if desired. If None is given, and
+        `header` and `index` are True, then the index names are used. A
+        sequence should be given if the DataFrame uses MultiIndex.
     merge_cells : bool, default False
-            Format MultiIndex and Hierarchical Rows as merged cells.
+        Format MultiIndex and Hierarchical Rows as merged cells.
     inf_rep : str, default `'inf'`
         representation for np.inf values (which aren't representable in Excel)
         A `'-'` sign will be added in front of -inf.