pandas-dev · jreback · Feb 27, 2022 · Dec 21, 2021 · Jan 15, 2022 · Jan 24, 2022
diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+import sys
+from typing import (
+    Any,
+    Generic,
+    Protocol,
+    TypeVar,
+    Union,
+    overload,
+)
+
+import numpy as np
+
+from pandas._typing import (
+    Timedelta,
+    Timestamp,
+)
+
+if sys.version_info >= (3, 8):
+    from typing import Literal
+else:
+    from typing_extensions import Literal
+
+VALID_CLOSED: frozenset
+
+OrderableScalarT = TypeVar("OrderableScalarT", int, float)
+OrderableTimesT = TypeVar("OrderableTimesT", Timestamp, Timedelta)
+OrderableT = TypeVar("OrderableT", int, float, Timestamp, Timedelta)
+
+class IntervalMixinProtocol(Protocol): ...
+
+class _LengthDescriptor:
+    @overload
+    def __get__(self, instance: Interval[float], owner: Any) -> float: ...
+    @overload
+    def __get__(self, instance: Interval[int], owner: Any) -> int: ...
+    @overload
+    def __get__(self, instance: Interval[OrderableTimesT], owner: Any) -> Timedelta: ...
+
+class _MidDescriptor:
+    @overload
+    def __get__(self, instance: Interval[OrderableScalarT], owner: Any) -> float: ...
+    @overload
+    def __get__(self, instance: Interval[Timedelta], owner: Any) -> Timedelta: ...
+    @overload
+    def __get__(self, instance: Interval[Timestamp], owner: Any) -> Timestamp: ...
+
+class IntervalMixin(IntervalMixinProtocol):
+    @property
+    def closed_left(self) -> bool: ...
+    @property
+    def closed_right(self) -> bool: ...
+    @property
+    def open_left(self) -> bool: ...
+    @property
+    def open_right(self) -> bool: ...
+    mid: _MidDescriptor
+    length: _LengthDescriptor
+    @property
+    def is_empty(self) -> bool: ...
+    def _check_closed_matches(self, other: IntervalMixin, name: str = ...): ...
+
+class Interval(IntervalMixin, Generic[OrderableT]):
+    @property
+    def left(self: Interval[OrderableT]) -> OrderableT: ...
+    @property
+    def right(self: Interval[OrderableT]) -> OrderableT: ...
+    @property
+    def closed(self) -> str: ...
+    def __init__(
+        self,
+        left: OrderableT,
+        right: OrderableT,
+        closed: Literal["left", "right", "both", "neither"] = ...,
+    ): ...
+    def __hash__(self) -> int: ...
+    @overload
+    def __contains__(self: Interval[OrderableTimesT], OrderableTimesT) -> bool: ...
+    @overload
+    def __contains__(self: Interval[int], key: Union[int, float]) -> bool: ...
+    @overload
+    def __contains__(self: Interval[float], key: Union[int, float]) -> bool: ...
+    def __repr__(self) -> str: ...
+    def __str__(self) -> str: ...
+    @overload
+    def __add__(
+        self: Interval[OrderableTimesT], y: Timedelta
+    ) -> Interval[OrderableTimesT]: ...
+    @overload
+    def __add__(self: Interval[int], y: int) -> Interval[int]: ...
+    @overload
+    def __add__(self: Interval[int], y: float) -> Interval[float]: ...
+    @overload
+    def __add__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
+    @overload
+    def __sub__(
+        self: Interval[OrderableTimesT], y: Timedelta
+    ) -> Interval[OrderableTimesT]: ...
+    @overload
+    def __sub__(self: Interval[int], y: int) -> Interval[int]: ...
+    @overload
+    def __sub__(self: Interval[int], y: float) -> Interval[float]: ...
+    @overload
+    def __sub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
+    @overload
+    def __mul__(self: Interval[int], y: int) -> Interval[int]: ...
+    @overload
+    def __mul__(self: Interval[int], y: float) -> Interval[float]: ...
+    @overload
+    def __mul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
+    @overload
+    def __truediv__(self: Interval[int], y: int) -> Interval[int]: ...
+    @overload
+    def __truediv__(self: Interval[int], y: float) -> Interval[float]: ...
+    @overload
+    def __truediv__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
+    @overload
+    def __floordiv__(self: Interval[int], y: int) -> Interval[int]: ...
+    @overload
+    def __floordiv__(self: Interval[int], y: float) -> Interval[float]: ...
+    @overload
+    def __floordiv__(
+        self: Interval[float], y: Union[int, float]
+    ) -> Interval[float]: ...
+    def overlaps(self: Interval[OrderableT], other: Interval[OrderableT]) -> bool: ...
+
+def intervals_to_interval_bounds(intervals: np.ndarray, validate_closed: int = ...): ...
+
+class IntervalTree(IntervalMixin):
+    def __init__(
+        self,
+        left: np.ndarray,
+        right: np.ndarray,
+        closed: Literal["left", "right", "both", "neither"] = ...,
+    ): ...
+    def get_indexer(self, target) -> np.ndarray: ...
+    def get_indexer_non_unique(self, target) -> np.ndarray: ...
+    _na_count: int
+    @property
+    def is_overlapping(self) -> bool: ...
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -83,7 +83,7 @@
 PythonScalar = Union[str, int, float, bool]
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
-Scalar = Union[PythonScalar, PandasScalar]
+Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
 IntStrT = TypeVar("IntStrT", int, str)
 
 

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -759,7 +759,7 @@ def factorize(
     else:
         dtype = values.dtype
         values = _ensure_data(values)
-        na_value: Scalar
+        na_value: Scalar | None
 
         if original.dtype.kind in ["m", "M"]:
             # Note: factorize_array will cast NaT bc it has a __int__

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -7,6 +7,7 @@
 )
 import textwrap
 from typing import (
+    Any,
     Sequence,
     TypeVar,
     Union,
@@ -197,6 +198,11 @@ class IntervalArray(IntervalMixin, ExtensionArray):
     can_hold_na = True
     _na_value = _fill_value = np.nan
 
+    # To make mypy recognize the fields
+    _left: Any
+    _right: Any
+    _dtype: Any
+
     # ---------------------------------------------------------------------
     # Constructors
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -336,7 +336,7 @@ def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,
         copy: bool = False,
-        na_value: Scalar = lib.no_default,
+        na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default,
     ) -> np.ndarray:
         """
         Convert to a NumPy Array.

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -762,7 +762,7 @@ def _str_replace(
         return type(self)(result)
 
     def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
         if pa_version_under4p0:
             return super()._str_match(pat, case, flags, na)
@@ -771,7 +771,9 @@ def _str_match(
             pat = "^" + pat
         return self._str_contains(pat, case, flags, na, regex=True)
 
-    def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
+    def _str_fullmatch(
+        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
         if pa_version_under4p0:
             return super()._str_fullmatch(pat, case, flags, na)
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -517,7 +517,7 @@ def f(x):
 
 
 def convert_to_list_like(
-    values: Scalar | Iterable | AnyArrayLike,
+    values: Scalar | Iterable | AnyArrayLike | Hashable,
 ) -> list | AnyArrayLike:
     """
     Convert list-like or scalar input to list-like. List, numpy and pandas array-like

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -9,6 +9,7 @@
 from typing import (
     Any,
     Hashable,
+    Literal,
 )
 
 import numpy as np
@@ -191,10 +192,12 @@ class IntervalIndex(ExtensionIndex):
     _typ = "intervalindex"
 
     # annotate properties pinned via inherit_names
-    closed: str
+    closed: Literal["left", "right", "both", "neither"]
     is_non_overlapping_monotonic: bool
     closed_left: bool
     closed_right: bool
+    open_left: bool
+    open_right: bool
 
     _data: IntervalArray
     _values: IntervalArray
@@ -317,9 +320,10 @@ def from_tuples(
         return cls._simple_new(arr, name=name)
 
     # --------------------------------------------------------------------
-
+    # error: Return type "IntervalTree" of "_engine" incompatible with return type
+    # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"
     @cache_readonly
-    def _engine(self) -> IntervalTree:
+    def _engine(self) -> IntervalTree:  # type: ignore[override]
         left = self._maybe_convert_i8(self.left)
         right = self._maybe_convert_i8(self.right)
         return IntervalTree(left, right, closed=self.closed)
@@ -511,7 +515,10 @@ def _maybe_convert_i8(self, key):
             left = self._maybe_convert_i8(key.left)
             right = self._maybe_convert_i8(key.right)
             constructor = Interval if scalar else IntervalIndex.from_arrays
-            return constructor(left, right, closed=self.closed)
+            # error: "object" not callable
+            return constructor(
+                left, right, closed=self.closed
+            )  # type: ignore[operator]
 
         if scalar:
             # Timestamp/Timedelta
@@ -543,7 +550,7 @@ def _maybe_convert_i8(self, key):
 
         return key_i8
 
-    def _searchsorted_monotonic(self, label, side: str = "left"):
+    def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
         if not self.is_non_overlapping_monotonic:
             raise KeyError(
                 "can only get slices from an IntervalIndex if bounds are "

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -10,8 +10,8 @@
     Hashable,
     List,
     Tuple,
-    TypeVar,
     Union,
+    cast,
     overload,
 )
 import warnings
@@ -66,6 +66,7 @@
 )
 from pandas.core import algorithms
 from pandas.core.algorithms import unique
+from pandas.core.arrays.base import ExtensionArray
 from pandas.core.arrays.datetimes import (
     maybe_convert_dtype,
     objects_to_datetime64ns,
@@ -85,7 +86,8 @@
 
 ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
 Scalar = Union[int, float, str]
-DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
+DatetimeScalar = Union[Scalar, datetime]
+
 DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
 start_caching_at = 50
 
@@ -638,7 +640,7 @@ def to_datetime(
     infer_datetime_format: bool = ...,
     origin=...,
     cache: bool = ...,
-) -> DatetimeScalar | NaTType:
+) -> Timestamp | NaTType:
     ...
 
 
@@ -1061,6 +1063,13 @@ def to_datetime(
             result = convert_listlike(arg, format, name=arg.name)
     elif is_list_like(arg):
         try:
+            # error: Argument 1 to "_maybe_cache" has incompatible type
+            # "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,
+            # ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],
+            # Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"
+            arg = cast(
+                Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg
+            )
             cache_array = _maybe_cache(arg, format, cache, convert_listlike)
         except OutOfBoundsDatetime:
             # caching attempts to create a DatetimeIndex, which may raise

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
+from typing import cast
+
 import numpy as np
 
+from pandas._libs.tslibs.nattype import NaTType
 from pandas._typing import (
     FilePath,
     ReadBuffer,
@@ -81,7 +84,9 @@ def get_sheet_by_name(self, name: str):
         self.close()
         raise ValueError(f"sheet {name} not found")
 
-    def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
+    def get_sheet_data(
+        self, sheet, convert_float: bool
+    ) -> list[list[Scalar | NaTType]]:
         """
         Parse an ODF Table into a list of lists
         """
@@ -99,12 +104,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
         empty_rows = 0
         max_row_len = 0
 
-        table: list[list[Scalar]] = []
+        table: list[list[Scalar | NaTType]] = []
 
         for sheet_row in sheet_rows:
             sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
             empty_cells = 0
-            table_row: list[Scalar] = []
+            table_row: list[Scalar | NaTType] = []
 
             for sheet_cell in sheet_cells:
                 if sheet_cell.qname == table_cell_name:
@@ -167,7 +172,7 @@ def _is_empty_row(self, row) -> bool:
 
         return True
 
-    def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
+    def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType:
         from odf.namespaces import OFFICENS
 
         if str(cell) == "#N/A":
@@ -200,9 +205,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             cell_value = cell.attributes.get((OFFICENS, "date-value"))
             return pd.to_datetime(cell_value)
         elif cell_type == "time":
-            stamp = pd.to_datetime(str(cell))
-            # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time"
-            return stamp.time()  # type: ignore[union-attr]
+            # cast needed because `pd.to_datetime can return NaTType,
+            # but we know this is a valid time
+            stamp = cast(pd.Timestamp, pd.to_datetime(str(cell)))
+            # cast needed here because Scalar doesn't include datetime.time
+            return cast(Scalar, stamp.time())
         else:
             self.close()
             raise ValueError(f"Unrecognized type {cell_type}")