From 75a15d469c1c484a0250cf8a0349625be184e737 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 21 Apr 2021 14:33:47 -0700
Subject: [PATCH] CLN: annotations, docstrings

---
 pandas/_libs/hashtable_class_helper.pxi.in |  5 ++-
 pandas/core/arrays/period.py               |  4 +--
 pandas/core/dtypes/dtypes.py               | 21 ++++++++-----
 pandas/core/groupby/ops.py                 |  2 +-
 pandas/core/indexes/base.py                |  9 ++----
 pandas/core/indexing.py                    | 34 ++++++++++----------
 pandas/core/resample.py                    | 36 +++++++++++++++-------
 pandas/core/sorting.py                     | 31 ++++++++++++++++---
 8 files changed, 92 insertions(+), 50 deletions(-)

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 4dc5e7516db7e..a25867c4a3b0c 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -687,7 +687,10 @@ cdef class {{name}}HashTable(HashTable):
 
     {{if dtype == 'int64'}}
     @cython.boundscheck(False)
-    def get_labels_groupby(self, const {{dtype}}_t[:] values):
+    def get_labels_groupby(
+        self, const {{dtype}}_t[:] values
+    ) -> tuple[ndarray, ndarray]:
+        # tuple[np.ndarray[np.intp], np.ndarray[{{dtype}}]]
         cdef:
             Py_ssize_t i, n = len(values)
             intp_t[:] labels
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 5a9dd0e89bd65..a9c94b615f49c 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -178,8 +178,8 @@ class PeriodArray(dtl.DatelikeOps):
         "days_in_month",
         "daysinmonth",
     ]
-    _datetimelike_ops = _field_ops + _object_ops + _bool_ops
-    _datetimelike_methods = ["strftime", "to_timestamp", "asfreq"]
+    _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
+    _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
 
     # --------------------------------------------------------------------
     # Constructors
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 84eede019251b..3d68688c21241 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -48,9 +48,14 @@
 )
 
 if TYPE_CHECKING:
+    from datetime import tzinfo
+
     import pyarrow
 
-    from pandas import Categorical
+    from pandas import (
+        Categorical,
+        Index,
+    )
     from pandas.core.arrays import (
         DatetimeArray,
         IntervalArray,
@@ -445,8 +450,8 @@ def _hash_categories(self) -> int:
             # assumes if any individual category is a tuple, then all our. ATM
             # I don't really want to support just some of the categories being
             # tuples.
-            categories = list(categories)  # breaks if a np.array of categories
-            cat_array = hash_tuples(categories)
+            cat_list = list(categories)  # breaks if a np.array of categories
+            cat_array = hash_tuples(cat_list)
         else:
             if categories.dtype == "O" and len({type(x) for x in categories}) != 1:
                 # TODO: hash_array doesn't handle mixed types. It casts
@@ -509,7 +514,7 @@ def validate_ordered(ordered: Ordered) -> None:
             raise TypeError("'ordered' must either be 'True' or 'False'")
 
     @staticmethod
-    def validate_categories(categories, fastpath: bool = False):
+    def validate_categories(categories, fastpath: bool = False) -> Index:
         """
         Validates that we have good categories
 
@@ -579,7 +584,7 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
         return CategoricalDtype(new_categories, new_ordered)
 
     @property
-    def categories(self):
+    def categories(self) -> Index:
         """
         An ``Index`` containing the unique categories allowed.
         """
@@ -717,7 +722,7 @@ def unit(self) -> str_type:
         return self._unit
 
     @property
-    def tz(self):
+    def tz(self) -> tzinfo:
         """
         The timezone.
         """
@@ -882,7 +887,7 @@ def freq(self):
         return self._freq
 
     @classmethod
-    def _parse_dtype_strict(cls, freq):
+    def _parse_dtype_strict(cls, freq: str_type) -> BaseOffset:
         if isinstance(freq, str):
             if freq.startswith("period[") or freq.startswith("Period["):
                 m = cls._match.search(freq)
@@ -1136,7 +1141,7 @@ def construct_array_type(cls) -> type[IntervalArray]:
         return IntervalArray
 
     @classmethod
-    def construct_from_string(cls, string):
+    def construct_from_string(cls, string: str_type) -> IntervalDtype:
         """
         attempt to construct this type from a string, raise a TypeError
         if its not possible
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 0a9c46f6ed069..4f9a71e5af59a 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -281,7 +281,7 @@ class BaseGrouper:
         whether this grouper will give sorted result or not
     group_keys : bool, default True
     mutated : bool, default False
-    indexer : intp array, optional
+    indexer : np.ndarray[np.intp], optional
         the indexer created by Grouper
         some groupers (TimeGrouper) will sort its axis and its
         group_info is also sorted, so need the indexer to reorder
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 58f5ca3de5dce..9b3f2d191831d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3029,9 +3029,6 @@ def _union(self, other: Index, sort):
 
     @final
     def _wrap_setop_result(self, other: Index, result) -> Index:
-        if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
-            result = Categorical(result, dtype=self.dtype)
-
         name = get_op_result_name(self, other)
         if isinstance(result, Index):
             if result.name != name:
@@ -4028,7 +4025,7 @@ def join(
         return join_index, lindexer, rindexer
 
     @final
-    def _join_multi(self, other, how):
+    def _join_multi(self, other: Index, how: str_t):
         from pandas.core.indexes.multi import MultiIndex
         from pandas.core.reshape.merge import restore_dropped_levels_multijoin
 
@@ -4273,7 +4270,7 @@ def _get_leaf_sorter(labels: list[np.ndarray]) -> np.ndarray:
         return join_index, left_indexer, right_indexer
 
     @final
-    def _join_monotonic(self, other: Index, how="left"):
+    def _join_monotonic(self, other: Index, how: str_t = "left"):
         # We only get here with matching dtypes
         assert other.dtype == self.dtype
 
@@ -5527,7 +5524,7 @@ def isin(self, values, level=None) -> np.ndarray:
 
         Returns
         -------
-        is_contained : ndarray[bool]
+        np.ndarray[bool]
             NumPy array of boolean values.
 
         See Also
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 1b68ac9780ee1..04543da167fdd 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -36,7 +36,6 @@
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
-    ABCMultiIndex,
     ABCSeries,
 )
 from pandas.core.dtypes.missing import (
@@ -53,7 +52,10 @@
     is_list_like_indexer,
     length_of_indexer,
 )
-from pandas.core.indexes.api import Index
+from pandas.core.indexes.api import (
+    Index,
+    MultiIndex,
+)
 
 if TYPE_CHECKING:
     from pandas import (
@@ -642,7 +644,7 @@ def _get_setitem_indexer(self, key):
 
         ax = self.obj._get_axis(0)
 
-        if isinstance(ax, ABCMultiIndex) and self.name != "iloc":
+        if isinstance(ax, MultiIndex) and self.name != "iloc":
             with suppress(TypeError, KeyError, InvalidIndexError):
                 # TypeError e.g. passed a bool
                 return ax.get_loc(key)
@@ -690,7 +692,7 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None):
 
         if (
             axis == column_axis
-            and not isinstance(self.obj.columns, ABCMultiIndex)
+            and not isinstance(self.obj.columns, MultiIndex)
             and is_list_like_indexer(key)
             and not com.is_bool_indexer(key)
             and all(is_hashable(k) for k in key)
@@ -756,7 +758,7 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
         -------
         bool
         """
-        if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes):
+        if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
             return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
         return False
 
@@ -817,7 +819,7 @@ def _getitem_lowerdim(self, tup: tuple):
         ax0 = self.obj._get_axis(0)
         # ...but iloc should handle the tuple as simple integer-location
         # instead of checking it as multiindex representation (GH 13797)
-        if isinstance(ax0, ABCMultiIndex) and self.name != "iloc":
+        if isinstance(ax0, MultiIndex) and self.name != "iloc":
             with suppress(IndexingError):
                 return self._handle_lowerdim_multi_index_axis0(tup)
 
@@ -996,7 +998,7 @@ def _is_scalar_access(self, key: tuple) -> bool:
                 return False
 
             ax = self.obj.axes[i]
-            if isinstance(ax, ABCMultiIndex):
+            if isinstance(ax, MultiIndex):
                 return False
 
             if isinstance(k, str) and ax._supports_partial_string_indexing:
@@ -1142,7 +1144,7 @@ def _getitem_axis(self, key, axis: int):
         elif is_list_like_indexer(key):
 
             # an iterable multi-selection
-            if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)):
+            if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
 
                 if hasattr(key, "ndim") and key.ndim > 1:
                     raise ValueError("Cannot index with multidimensional key")
@@ -1205,20 +1207,20 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
         is_int_index = labels.is_integer()
         is_int_positional = is_integer(key) and not is_int_index
 
-        if is_scalar(key) or isinstance(labels, ABCMultiIndex):
+        if is_scalar(key) or isinstance(labels, MultiIndex):
             # Otherwise get_loc will raise InvalidIndexError
 
             # if we are a label return me
             try:
                 return labels.get_loc(key)
             except LookupError:
-                if isinstance(key, tuple) and isinstance(labels, ABCMultiIndex):
+                if isinstance(key, tuple) and isinstance(labels, MultiIndex):
                     if len(key) == labels.nlevels:
                         return {"key": key}
                     raise
             except InvalidIndexError:
                 # GH35015, using datetime as column indices raises exception
-                if not isinstance(labels, ABCMultiIndex):
+                if not isinstance(labels, MultiIndex):
                     raise
             except TypeError:
                 pass
@@ -1620,7 +1622,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
         # GH 10360, GH 27841
         if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
             for i, ax in zip(indexer, self.obj.axes):
-                if isinstance(ax, ABCMultiIndex) and not (
+                if isinstance(ax, MultiIndex) and not (
                     is_integer(i) or com.is_null_slice(i)
                 ):
                     take_split_path = True
@@ -1819,7 +1821,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
         sub_indexer = list(indexer)
         pi = indexer[0]
 
-        multiindex_indexer = isinstance(self.obj.columns, ABCMultiIndex)
+        multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
 
         unique_cols = value.columns.is_unique
 
@@ -2163,8 +2165,8 @@ def _align_frame(self, indexer, df: DataFrame):
                 # we have a multi-index and are trying to align
                 # with a particular, level GH3738
                 if (
-                    isinstance(ax, ABCMultiIndex)
-                    and isinstance(df.index, ABCMultiIndex)
+                    isinstance(ax, MultiIndex)
+                    and isinstance(df.index, MultiIndex)
                     and ax.nlevels != df.index.nlevels
                 ):
                     raise TypeError(
@@ -2428,7 +2430,7 @@ def is_nested_tuple(tup, labels) -> bool:
 
     for k in tup:
         if is_list_like(k) or isinstance(k, slice):
-            return isinstance(labels, ABCMultiIndex)
+            return isinstance(labels, MultiIndex)
 
     return False
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 58003c10db9e0..91c77e987654b 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -20,6 +20,7 @@
     to_offset,
 )
 from pandas._typing import (
+    FrameOrSeries,
     T,
     TimedeltaConvertibleTypes,
     TimestampConvertibleTypes,
@@ -1345,9 +1346,15 @@ def _upsample(self, method, limit=None, fill_value=None):
 
         # Get the fill indexer
         indexer = memb.get_indexer(new_index, method=method, limit=limit)
-        return self._wrap_result(
-            _take_new_index(obj, indexer, new_index, axis=self.axis)
+        new_obj = _take_new_index(
+            obj,
+            indexer,
+            # error: Argument 3 to "_take_new_index" has incompatible type
+            # "Optional[Any]"; expected "Index"
+            new_index,  # type: ignore[arg-type]
+            axis=self.axis,
         )
+        return self._wrap_result(new_obj)
 
 
 class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
@@ -1666,7 +1673,7 @@ def _adjust_bin_edges(self, binner, ax_values):
             bin_edges = binner.asi8
         return binner, bin_edges
 
-    def _get_time_delta_bins(self, ax):
+    def _get_time_delta_bins(self, ax: TimedeltaIndex):
         if not isinstance(ax, TimedeltaIndex):
             raise TypeError(
                 "axis must be a TimedeltaIndex, but got "
@@ -1789,17 +1796,24 @@ def _get_period_bins(self, ax: PeriodIndex):
         return binner, bins, labels
 
 
-def _take_new_index(obj, indexer, new_index, axis=0):
+def _take_new_index(
+    obj: FrameOrSeries, indexer: np.ndarray, new_index: Index, axis: int = 0
+) -> FrameOrSeries:
+    # indexer: np.ndarray[np.intp]
 
     if isinstance(obj, ABCSeries):
         new_values = algos.take_nd(obj._values, indexer)
-        return obj._constructor(new_values, index=new_index, name=obj.name)
+        # error: Incompatible return value type (got "Series", expected "FrameOrSeries")
+        return obj._constructor(  # type: ignore[return-value]
+            new_values, index=new_index, name=obj.name
+        )
     elif isinstance(obj, ABCDataFrame):
         if axis == 1:
             raise NotImplementedError("axis 1 is not supported")
-        return obj._constructor(
-            obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
-        )
+        new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
+        # error: Incompatible return value type
+        # (got "DataFrame", expected "FrameOrSeries")
+        return obj._constructor(new_mgr)  # type: ignore[return-value]
     else:
         raise ValueError("'obj' should be either a Series or a DataFrame")
 
@@ -1822,7 +1836,7 @@ def _get_timestamp_range_edges(
         The ending Timestamp of the range to be adjusted.
     freq : pd.DateOffset
         The dateoffset to which the Timestamps will be adjusted.
-    closed : {'right', 'left'}, default None
+    closed : {'right', 'left'}, default "left"
         Which side of bin interval is closed.
     origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'
         The timestamp on which to adjust the grouping. The timezone of origin must
@@ -1892,7 +1906,7 @@ def _get_period_range_edges(
         The ending Period of the range to be adjusted.
     freq : pd.DateOffset
         The freq to which the Periods will be adjusted.
-    closed : {'right', 'left'}, default None
+    closed : {'right', 'left'}, default "left"
         Which side of bin interval is closed.
     origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'
         The timestamp on which to adjust the grouping. The timezone of origin must
@@ -2042,7 +2056,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
     return new_obj
 
 
-def _asfreq_compat(index, freq):
+def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq):
     """
     Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex.
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 71963ec4a2123..dd7ae904c866c 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -182,7 +182,7 @@ def maybe_lift(lab, size):
     return out
 
 
-def get_compressed_ids(labels, sizes):
+def get_compressed_ids(labels, sizes) -> tuple[np.ndarray, np.ndarray]:
     """
     Group_index is offsets into cartesian product of all possible labels. This
     space can be huge, so this function compresses it, by computing offsets
@@ -195,7 +195,10 @@ def get_compressed_ids(labels, sizes):
 
     Returns
     -------
-    tuple of (comp_ids, obs_group_ids)
+    np.ndarray[np.intp]
+        comp_ids
+    np.ndarray[np.int64]
+        obs_group_ids
     """
     ids = get_group_index(labels, sizes, sort=True, xnull=False)
     return compress_group_index(ids, sort=True)
@@ -254,7 +257,8 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool):
     return [i8copy(lab[i]) for lab in labels]
 
 
-def indexer_from_factorized(labels, shape, compress: bool = True):
+def indexer_from_factorized(labels, shape, compress: bool = True) -> np.ndarray:
+    # returned ndarray is np.intp
     ids = get_group_index(labels, shape, sort=True, xnull=False)
 
     if not compress:
@@ -268,7 +272,7 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
 
 def lexsort_indexer(
     keys, orders=None, na_position: str = "last", key: Callable | None = None
-):
+) -> np.ndarray:
     """
     Performs lexical sorting on a set of keys
 
@@ -288,6 +292,10 @@ def lexsort_indexer(
         Callable key function applied to every element in keys before sorting
 
         .. versionadded:: 1.0.0
+
+    Returns
+    -------
+    np.ndarray[np.intp]
     """
     from pandas.core.arrays import Categorical
 
@@ -656,7 +664,20 @@ def compress_group_index(group_index, sort: bool = True):
     return ensure_int64(comp_ids), ensure_int64(obs_group_ids)
 
 
-def _reorder_by_uniques(uniques, labels):
+def _reorder_by_uniques(
+    uniques: np.ndarray, labels: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Parameters
+    ----------
+    uniques : np.ndarray[np.int64]
+    labels : np.ndarray[np.intp]
+
+    Returns
+    -------
+    np.ndarray[np.int64]
+    np.ndarray[np.intp]
+    """
     # sorter is index where elements ought to go
     sorter = uniques.argsort()