pandas-dev · jbrockmendel · Apr 20, 2021 · Apr 20, 2021 · Apr 24, 2021 · Apr 24, 2021
diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi
@@ -0,0 +1,89 @@
+import numpy as np
+
+from pandas import (
+    Timedelta,
+    Timestamp,
+)
+
+VALID_CLOSED: frozenset[str]
+
+
+class IntervalMixin:
+    closed: str
+
+    @property
+    def closed_left(self) -> bool: ...
+
+    @property
+    def closed_right(self) -> bool: ...
+
+    @property
+    def open_left(self) -> bool: ...
+
+    @property
+    def open_right(self) -> bool: ...
+
+    @property
+    def mid(self): ...
+
+    @property
+    def length(self): ...
+
+    @property
+    def is_empty(self): ...
+
+    def _check_closed_matches(self, other, name: str = ...) -> None: ...
+
+
+class Interval(IntervalMixin):
+    left: int | float | Timestamp | Timedelta
+    right: int | float | Timestamp | Timedelta
+
+    def __init__(self, left, right, closed: str = ...): ...
+
+    def __contains__(self, key) -> bool: ...
+    def __str__(self) -> str: ...
+    def __add__(self, y): ...
+    def __sub__(self, y): ...
+    def __mul__(self, y): ...
+    def __truediv__(self, y): ...
+    def __floordiv__(self, y): ...
+
+    def overlaps(self, other: Interval) -> bool: ...
+
+
+def intervals_to_interval_bounds(
+    intervals: np.ndarray,
+    validate_closed: bool = ...,
+) -> tuple[np.ndarray, np.ndarray, str]: ...
+
+
+class IntervalTree(IntervalMixin):
+    def __init__(self, left, right, closed=..., leaf_size=...): ...
+
+    @property
+    def left_sorter(self) -> np.ndarray: ...  # np.ndarray[np.intp]
+
+    @property
+    def right_sorter(self) -> np.ndarray: ...  # np.ndarray[np.intp]
+
+    @property
+    def is_overlapping(self) -> bool: ...
+
+    @property
+    def is_monotonic_increasing(self) -> bool: ...
+
+    def get_indexer(
+        self,
+        target: np.ndarray,  # scalar_t[:]
+    ) -> np.ndarray: ...  #  np.ndarray[np.intp]
+
+    def get_indexer_non_unique(
+        self,
+        target: np.ndarray,  # scalar_t[:]
+    ) -> tuple[
+        np.ndarray,  # np.ndarray[np.intp]
+        np.ndarray,  # np.ndarray[np.intp]
+    ]: ...
+
+    def clear_mapping(self) -> None: ...
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -1,5 +1,7 @@
 from datetime import (
+    date,
     datetime,
+    time,
     timedelta,
     tzinfo,
 )
@@ -86,10 +88,10 @@
 
 # scalars
 
-PythonScalar = Union[str, int, float, bool]
+PythonScalar = Optional[Union[str, int, float, complex, bool, date, time, timedelta]]
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
-Scalar = Union[PythonScalar, PandasScalar]
+Scalar = Union[PythonScalar, PandasScalar, np.number, np.datetime64, np.timedelta64]
 
 # timestamp and timedelta convertible types
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -243,7 +243,10 @@ def contains(cat, key, container):
         return any(loc_ in container for loc_ in loc)
 
 
-class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin):
+# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
+class Categorical(  # type: ignore[misc]
+    NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin
+):
     """
     Represent a categorical variable in classic R / S-plus fashion.
 

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -141,7 +141,10 @@ class InvalidComparison(Exception):
     pass
 
 
-class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
+# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
+class DatetimeLikeArrayMixin(  # type: ignore[misc]
+    OpsMixin, NDArrayBackedExtensionArray
+):
     """
     Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -148,7 +148,8 @@ def f(self):
     return property(f)
 
 
-class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
+# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
+class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):  # type: ignore[misc]
     """
     Pandas ExtensionArray for tz-naive or tz-aware datetime data.
 

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -7,6 +7,8 @@
 )
 import textwrap
 from typing import (
+    TYPE_CHECKING,
+    Generic,
     Sequence,
     TypeVar,
     cast,
@@ -84,7 +86,14 @@
     unpack_zerodim_and_defer,
 )
 
+if TYPE_CHECKING:
+    from pandas.core.arrays import (
+        DatetimeArray,
+        TimedeltaArray,
+    )
+
 IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
+S = TypeVar("S", np.ndarray, "DatetimeArray", "TimedeltaArray")
 
 _interval_shared_docs: dict[str, str] = {}
 
@@ -186,11 +195,15 @@
         ),
     }
 )
-class IntervalArray(IntervalMixin, ExtensionArray):
+class IntervalArray(IntervalMixin, ExtensionArray, Generic[S]):
     ndim = 1
     can_hold_na = True
     _na_value = _fill_value = np.nan
 
+    _dtype: IntervalDtype
+    _left: S
+    _right: S
+
     # ---------------------------------------------------------------------
     # Constructors
 
@@ -586,7 +599,12 @@ def _validate(self):
                 "location both left and right sides"
             )
             raise ValueError(msg)
-        if not (self._left[left_mask] <= self._right[left_mask]).all():
+        # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
+        if not (  # type: ignore[union-attr]
+            # error: Unsupported operand types for <= ("Timestamp" and "Timedelta")
+            self._left[left_mask]  # type: ignore[operator]
+            <= self._right[left_mask]
+        ).all():
             msg = "left side of interval must be <= right side"
             raise ValueError(msg)
 
@@ -930,9 +948,9 @@ def shift(
             from pandas import Index
 
             fill_value = Index(self._left, copy=False)._na_value
-            empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
+            empty = type(self).from_breaks([fill_value] * (empty_len + 1))
         else:
-            empty = self._from_sequence([fill_value] * empty_len)
+            empty = type(self)._from_sequence([fill_value] * empty_len)
 
         if periods > 0:
             a = empty
@@ -1355,15 +1373,31 @@ def is_non_overlapping_monotonic(self) -> bool:
         # at a point when both sides of intervals are included
         if self.closed == "both":
             return bool(
-                (self._right[:-1] < self._left[1:]).all()
-                or (self._left[:-1] > self._right[1:]).all()
+                # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
+                # error: Unsupported operand types for > ("Timedelta" and "Timestamp")
+                (  # type: ignore[union-attr]
+                    self._right[:-1] < self._left[1:]  # type: ignore[operator]
+                ).all()
+                # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
+                # error: Unsupported operand types for > ("Timedelta" and "Timestamp")
+                or (  # type: ignore[union-attr]
+                    self._left[:-1] > self._right[1:]  # type: ignore[operator]
+                ).all()
             )
 
         # non-strict inequality when closed != 'both'; at least one side is
         # not included in the intervals, so equality does not imply overlapping
         return bool(
-            (self._right[:-1] <= self._left[1:]).all()
-            or (self._left[:-1] >= self._right[1:]).all()
+            # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
+            # error: Unsupported operand types for <= ("Timestamp" and "Timedelta")
+            (  # type: ignore[union-attr]
+                self._right[:-1] <= self._left[1:]  # type: ignore[operator]
+            ).all()
+            # error: Item "bool" of "Union[Any, bool]" has no attribute "all"
+            # error: Unsupported operand types for >= ("Timedelta" and "Timestamp")
+            or (  # type: ignore[union-attr]
+                self._left[:-1] >= self._right[1:]  # type: ignore[operator]
+            ).all()
         )
 
     # ---------------------------------------------------------------------

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -215,7 +215,7 @@ def to_numpy(  # type: ignore[override]
         self,
         dtype: NpDtype | None = None,
         copy: bool = False,
-        na_value: Scalar = lib.no_default,
+        na_value: Scalar | lib.NoDefault = lib.no_default,
     ) -> np.ndarray:
         """
         Convert to a NumPy Array.

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -27,7 +27,8 @@
 from pandas.core.strings.object_array import ObjectStringArrayMixin
 
 
-class PandasArray(
+# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
+class PandasArray(  # type: ignore[misc]
     OpsMixin,
     NDArrayBackedExtensionArray,
     NDArrayOperatorsMixin,

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -228,7 +228,8 @@ class BaseStringArray(ExtensionArray):
     pass
 
 
-class StringArray(BaseStringArray, PandasArray):
+# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
+class StringArray(BaseStringArray, PandasArray):  # type: ignore[misc]
     """
     Extension array for string data.
 
@@ -311,8 +312,6 @@ def __init__(self, values, copy=False):
         values = extract_array(values)
 
         super().__init__(values, copy=copy)
-        # error: Incompatible types in assignment (expression has type "StringDtype",
-        # variable has type "PandasDtype")
         NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
         if not isinstance(values, type(self)):
             self._validate()

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1101,7 +1101,8 @@ def _memory_usage(self, deep: bool = False) -> int:
         return v
 
     @doc(
-        algorithms.factorize,
+        # error: Cannot determine type of 'factorize'
+        algorithms.factorize,  # type: ignore[has-type]
         values="",
         order="",
         size_hint="",

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -30,7 +30,6 @@
 from pandas._typing import (
     AnyArrayLike,
     NpDtype,
-    Scalar,
     T,
 )
 from pandas.compat import np_version_under1p18
@@ -487,7 +486,7 @@ def f(x):
 
 
 def convert_to_list_like(
-    values: Scalar | Iterable | AnyArrayLike,
+    values: Any,
 ) -> list | AnyArrayLike:
     """
     Convert list-like or scalar input to list-like. List, numpy and pandas array-like

diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py
@@ -605,6 +605,12 @@ def __repr__(self) -> str:
 
     def evaluate(self):
         """create and return the numexpr condition and filter"""
+        if self.terms is None:
+            raise ValueError(
+                f"cannot process expression [{self.expr}], [{self}] "
+                "is not a valid condition"
+            )
+
         try:
             self.condition = self.terms.prune(ConditionBinOp)
         except AttributeError as err:

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -209,7 +209,11 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
         # GH#36541: can't fill array directly with pd.NaT
         # > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
         # ValueError: cannot convert float NaN to integer
-        value = dtype.type("NaT", "ns")
+        # error: Incompatible types in assignment (expression has type
+        # "Union[generic, Any]", variable has type "Union[Union[str, int, float,
+        # complex, bool, date, time, timedelta, None], Union[Period, Timestamp,
+        # Timedelta, Interval], number[Any], datetime64, timedelta64]")
+        value = dtype.type("NaT", "ns")  # type: ignore[assignment]
     elif isinstance(value, Timestamp):
         if value.tz is None:
             value = value.to_datetime64()

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -36,6 +36,7 @@
 )
 from pandas.errors import InvalidIndexError
 from pandas.util._decorators import (
+    Appender,
     cache_readonly,
     doc,
 )
@@ -263,7 +264,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
     # --------------------------------------------------------------------
     # methods that dispatch to DatetimeArray and wrap result
 
-    @doc(DatetimeArray.strftime)
+    # error: Cannot determine type of 'strftime'
+    @Appender(DatetimeArray.strftime.__doc__)  # type: ignore[has-type]
     def strftime(self, date_format) -> Index:
         arr = self._data.strftime(date_format)
         return Index(arr, name=self.name)
@@ -273,12 +275,14 @@ def tz_convert(self, tz) -> DatetimeIndex:
         arr = self._data.tz_convert(tz)
         return type(self)._simple_new(arr, name=self.name)
 
-    @doc(DatetimeArray.tz_localize)
+    # error: Cannot determine type of 'tz_localize'
+    @doc(DatetimeArray.tz_localize)  # type: ignore[has-type]
     def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex:
         arr = self._data.tz_localize(tz, ambiguous, nonexistent)
         return type(self)._simple_new(arr, name=self.name)
 
-    @doc(DatetimeArray.to_period)
+    # error: Cannot determine type of 'to_period'
+    @doc(DatetimeArray.to_period)  # type: ignore[has-type]
     def to_period(self, freq=None) -> PeriodIndex:
         from pandas.core.indexes.api import PeriodIndex
 

diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -282,7 +282,9 @@ def __getitem__(self, key):
                 return type(self)(result, name=self._name)
             # Unpack to ndarray for MPL compat
 
-            result = result._ndarray
+            # error: Item "IntervalArray" of "Union[Any, IntervalArray,
+            # NDArrayBackedExtensionArray]" has no attribute "_ndarray"
+            result = result._ndarray  # type: ignore[union-attr]
 
         # Includes cases where we get a 2D ndarray back for MPL compat
         deprecate_ndim_indexing(result)
@@ -438,8 +440,11 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):
 
     _data: NDArrayBackedExtensionArray
 
+    # Argument 1 of "_simple_new" is incompatible with supertype "ExtensionIndex";
+    # supertype defines the argument type as
+    # "Union[IntervalArray, NDArrayBackedExtensionArray]"
     @classmethod
-    def _simple_new(
+    def _simple_new(  # type: ignore[override]
         cls,
         values: NDArrayBackedExtensionArray,
         name: Hashable = None,