REF/TYP: indexes (#40330)

jbrockmendel · web-flow · commit 35701510b941 · 2021-03-09T15:17:09.000-05:00
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -173,7 +173,6 @@
         RangeIndex,
         Series,
     )
-    from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
 
 
 __all__ = ["Index"]
@@ -305,7 +304,7 @@ def _outer_indexer(
 
     _typ = "index"
     _data: Union[ExtensionArray, np.ndarray]
-    _id: Optional[_Identity] = None
+    _id: Optional[object] = None
     _name: Hashable = None
     # MultiIndex.levels previously allowed setting the index name. We
     # don't allow this anymore, and raise if it happens rather than
@@ -711,7 +710,7 @@ def _reset_identity(self) -> None:
         """
         Initializes or resets ``_id`` attribute with new object.
         """
-        self._id = _Identity(object())
+        self._id = object()
 
     @final
     def _cleanup(self) -> None:
@@ -1717,7 +1716,7 @@ def sortlevel(self, level=None, ascending=True, sort_remaining=None):
 
         return self.sort_values(return_indexer=True, ascending=ascending)
 
-    def _get_level_values(self, level):
+    def _get_level_values(self, level) -> Index:
         """
         Return an Index of values for requested level.
 
@@ -2977,11 +2976,8 @@ def _union(self, other: Index, sort):
         return result
 
     @final
-    def _wrap_setop_result(self, other, result):
-        if needs_i8_conversion(self.dtype) and isinstance(result, np.ndarray):
-            self = cast("DatetimeIndexOpsMixin", self)
-            result = type(self._data)._simple_new(result, dtype=self.dtype)
-        elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
+    def _wrap_setop_result(self, other: Index, result) -> Index:
+        if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
             result = Categorical(result, dtype=self.dtype)
 
         name = get_op_result_name(self, other)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -178,6 +178,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
     """
 
     _typ = "categoricalindex"
+    _data_cls = Categorical
 
     @property
     def _can_hold_strings(self):
@@ -225,18 +226,6 @@ def __new__(
 
         return cls._simple_new(data, name=name)
 
-    @classmethod
-    def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None):
-        assert isinstance(values, Categorical), type(values)
-        result = object.__new__(cls)
-
-        result._data = values
-        result._name = name
-        result._cache = {}
-
-        result._reset_identity()
-        return result
-
     # --------------------------------------------------------------------
 
     @doc(Index._shallow_copy)
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -5,11 +5,9 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Hashable,
     List,
     Optional,
     Tuple,
-    Type,
     TypeVar,
     Union,
     cast,
@@ -44,7 +42,6 @@
     is_integer,
     is_list_like,
     is_period_dtype,
-    is_scalar,
 )
 from pandas.core.dtypes.concat import concat_compat
 
@@ -119,7 +116,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
 
     _can_hold_strings = False
     _data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
-    _data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]]
     freq: Optional[BaseOffset]
     freqstr: Optional[str]
     _resolution_obj: Resolution
@@ -132,25 +128,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
     )
     _hasnans = hasnans  # for index / array -agnostic code
 
-    @classmethod
-    def _simple_new(
-        cls,
-        values: Union[DatetimeArray, TimedeltaArray, PeriodArray],
-        name: Optional[Hashable] = None,
-    ):
-        assert isinstance(values, cls._data_cls), type(values)
-
-        result = object.__new__(cls)
-        result._data = values
-        result._name = name
-        result._cache = {}
-
-        # For groupby perf. See note in indexes/base about _index_data
-        result._index_data = values._ndarray
-
-        result._reset_identity()
-        return result
-
     @property
     def _is_all_dates(self) -> bool:
         return True
@@ -219,12 +196,10 @@ def equals(self, other: Any) -> bool:
     def __contains__(self, key: Any) -> bool:
         hash(key)
         try:
-            res = self.get_loc(key)
+            self.get_loc(key)
         except (KeyError, TypeError, ValueError):
             return False
-        return bool(
-            is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
-        )
+        return True
 
     @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
     def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -2,7 +2,9 @@
 Shared methods for Index subclasses backed by ExtensionArray.
 """
 from typing import (
+    Hashable,
     List,
+    Type,
     TypeVar,
     Union,
 )
@@ -30,7 +32,13 @@
     ABCSeries,
 )
 
-from pandas.core.arrays import IntervalArray
+from pandas.core.arrays import (
+    Categorical,
+    DatetimeArray,
+    IntervalArray,
+    PeriodArray,
+    TimedeltaArray,
+)
 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
 from pandas.core.indexers import deprecate_ndim_indexing
 from pandas.core.indexes.base import Index
@@ -352,6 +360,32 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):
 
     _data: NDArrayBackedExtensionArray
 
+    _data_cls: Union[
+        Type[Categorical],
+        Type[DatetimeArray],
+        Type[TimedeltaArray],
+        Type[PeriodArray],
+    ]
+
+    @classmethod
+    def _simple_new(
+        cls,
+        values: NDArrayBackedExtensionArray,
+        name: Hashable = None,
+    ):
+        assert isinstance(values, cls._data_cls), type(values)
+
+        result = object.__new__(cls)
+        result._data = values
+        result._name = name
+        result._cache = {}
+
+        # For groupby perf. See note in indexes/base about _index_data
+        result._index_data = values._ndarray
+
+        result._reset_identity()
+        return result
+
     def _get_engine_target(self) -> np.ndarray:
         return self._data._ndarray
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -6,6 +6,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    Collection,
     Hashable,
     Iterable,
     List,
@@ -98,6 +99,7 @@
 if TYPE_CHECKING:
     from pandas import (
         CategoricalIndex,
+        DataFrame,
         Series,
     )
 
@@ -323,7 +325,7 @@ def __new__(
         if len(levels) == 0:
             raise ValueError("Must pass non-zero number of levels/codes")
 
-        result = object.__new__(MultiIndex)
+        result = object.__new__(cls)
         result._cache = {}
 
         # we've already validated levels and codes, so shortcut here
@@ -503,7 +505,7 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex
     @names_compat
     def from_tuples(
         cls,
-        tuples,
+        tuples: Iterable[Tuple[Hashable, ...]],
         sortorder: Optional[int] = None,
         names: Optional[Sequence[Hashable]] = None,
     ) -> MultiIndex:
@@ -546,6 +548,7 @@ def from_tuples(
             raise TypeError("Input must be a list / sequence of tuple-likes.")
         elif is_iterator(tuples):
             tuples = list(tuples)
+        tuples = cast(Collection[Tuple[Hashable, ...]], tuples)
 
         arrays: List[Sequence[Hashable]]
         if len(tuples) == 0:
@@ -560,7 +563,8 @@ def from_tuples(
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
         else:
-            arrays = zip(*tuples)
+            arrs = zip(*tuples)
+            arrays = cast(List[Sequence[Hashable]], arrs)
 
         return cls.from_arrays(arrays, sortorder=sortorder, names=names)
 
@@ -626,7 +630,7 @@ def from_product(
         return cls(levels, codes, sortorder=sortorder, names=names)
 
     @classmethod
-    def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex:
+    def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
         """
         Make a MultiIndex from a DataFrame.
 
@@ -762,7 +766,7 @@ def __len__(self) -> int:
     # Levels Methods
 
     @cache_readonly
-    def levels(self):
+    def levels(self) -> FrozenList:
         # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
         # create new IndexEngine
         # https://github.com/pandas-dev/pandas/issues/31648
@@ -1293,7 +1297,7 @@ def _formatter_func(self, tup):
         formatter_funcs = [level._formatter_func for level in self.levels]
         return tuple(func(val) for func, val in zip(formatter_funcs, tup))
 
-    def _format_data(self, name=None):
+    def _format_data(self, name=None) -> str:
         """
         Return the formatted data as a unicode string
         """
@@ -1419,10 +1423,10 @@ def format(
     # --------------------------------------------------------------------
     # Names Methods
 
-    def _get_names(self):
+    def _get_names(self) -> FrozenList:
         return FrozenList(self._names)
 
-    def _set_names(self, names, level=None, validate=True):
+    def _set_names(self, names, level=None, validate: bool = True):
         """
         Set new names on index. Each name has to be a hashable type.
 
@@ -1433,7 +1437,7 @@ def _set_names(self, names, level=None, validate=True):
         level : int, level name, or sequence of int/level names (default None)
             If the index is a MultiIndex (hierarchical), level(s) to set (None
             for all levels).  Otherwise level must be None
-        validate : boolean, default True
+        validate : bool, default True
             validate that the names match level lengths
 
         Raises
@@ -1712,7 +1716,7 @@ def unique(self, level=None):
             level = self._get_level_number(level)
             return self._get_level_values(level=level, unique=True)
 
-    def to_frame(self, index=True, name=None):
+    def to_frame(self, index=True, name=None) -> DataFrame:
         """
         Create a DataFrame with the levels of the MultiIndex as columns.
 
@@ -2109,8 +2113,8 @@ def take(
 
         na_value = -1
 
+        taken = [lab.take(indices) for lab in self.codes]
         if allow_fill:
-            taken = [lab.take(indices) for lab in self.codes]
             mask = indices == -1
             if mask.any():
                 masked = []
@@ -2119,8 +2123,6 @@ def take(
                     label_values[mask] = na_value
                     masked.append(np.asarray(label_values))
                 taken = masked
-        else:
-            taken = [lab.take(indices) for lab in self.codes]
 
         return MultiIndex(
             levels=self.levels, codes=taken, names=self.names, verify_integrity=False
@@ -2644,7 +2646,9 @@ def _get_partial_string_timestamp_match_key(self, key):
 
         return key
 
-    def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):
+    def _get_indexer(
+        self, target: Index, method=None, limit=None, tolerance=None
+    ) -> np.ndarray:
 
         # empty indexer
         if not len(target):
@@ -3521,7 +3525,7 @@ def equals(self, other: object) -> bool:
 
         return True
 
-    def equal_levels(self, other) -> bool:
+    def equal_levels(self, other: MultiIndex) -> bool:
         """
         Return True if the levels of both MultiIndex objects are the same
 
@@ -3537,7 +3541,7 @@ def equal_levels(self, other) -> bool:
     # --------------------------------------------------------------------
     # Set Methods
 
-    def _union(self, other, sort):
+    def _union(self, other, sort) -> MultiIndex:
         other, result_names = self._convert_can_do_setop(other)
 
         # We could get here with CategoricalIndex other
@@ -3579,7 +3583,7 @@ def _maybe_match_names(self, other):
                 names.append(None)
         return names
 
-    def _intersection(self, other, sort=False):
+    def _intersection(self, other, sort=False) -> MultiIndex:
         other, result_names = self._convert_can_do_setop(other)
 
         lvals = self._values
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -232,6 +232,7 @@ def __contains__(self, key) -> bool:
         hash(key)
         try:
             if is_float(key) and int(key) != key:
+                # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
                 return False
             return key in self._engine
         except (OverflowError, TypeError, ValueError):
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -67,7 +67,7 @@ class RangeIndex(Int64Index):
 
     Parameters
     ----------
-    start : int (default: 0), or other RangeIndex instance
+    start : int (default: 0), range, or other RangeIndex instance
         If int and "stop" is not given, interpreted as "stop" instead.
     stop : int (default: 0)
     step : int (default: 1)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -122,10 +122,6 @@ def test_intercept_builtin_sum():
     tm.assert_series_equal(result2, expected)
 
 
-# @pytest.mark.parametrize("f", [max, min, sum])
-# def test_builtins_apply(f):
-
-
 @pytest.mark.parametrize("f", [max, min, sum])
 @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]])  # Single key  # Multi-key
 def test_builtins_apply(keys, f):