diff --git a/pandas/_libs/interval.pyi b/pandas/_libs/interval.pyi new file mode 100644 index 0000000000000..33f59dcdabb1f --- /dev/null +++ b/pandas/_libs/interval.pyi @@ -0,0 +1,120 @@ +from datetime import timedelta +import numbers +from typing import ( + Any, + Generic, + TypeVar, + overload, +) + +import numpy as np +import numpy.typing as npt + +from pandas._libs import ( + Timedelta, + Timestamp, +) +from pandas._typing import IntervalBound + +_OrderableMixinT = TypeVar( + "_OrderableMixinT", int, float, Timestamp, Timedelta, npt.NDArray[np.generic] +) +_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta) + +# note: mypy doesn't support overloading properties +# based on github.com/microsoft/python-type-stubs/pull/167 +class _LengthProperty: + @overload + def __get__(self, instance: IntervalMixin[Timestamp], owner: Any) -> Timedelta: ... + @overload + def __get__( + self, instance: IntervalMixin[_OrderableMixinT], owner: Any + ) -> _OrderableMixinT: ... + +class IntervalMixin(Generic[_OrderableMixinT]): + @property + def closed_left(self) -> bool: ... + @property + def closed_right(self) -> bool: ... + @property + def open_left(self) -> bool: ... + @property + def open_right(self) -> bool: ... + @property + def mid(self) -> _OrderableT: ... + length: _LengthProperty + @property + def is_empty(self) -> bool: ... + def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ... + +class Interval(IntervalMixin[_OrderableT]): + def __init__( + self, + left: _OrderableT, + right: _OrderableT, + closed: IntervalBound = ..., + ) -> None: ... + @property + def closed(self) -> str: ... + @property + def left(self) -> _OrderableT: ... + @property + def right(self) -> _OrderableT: ... + def __str__(self) -> str: ... + # TODO: could return Interval with different type + def __add__( + self, y: numbers.Number | np.timedelta64 | timedelta + ) -> Interval[_OrderableT]: ... + def __radd__( + self, y: numbers.Number | np.timedelta64 | timedelta + ) -> Interval[_OrderableT]: ... + def __sub__( + self, y: numbers.Number | np.timedelta64 | timedelta + ) -> Interval[_OrderableT]: ... + def __mul__(self, y: numbers.Number) -> Interval[_OrderableT]: ... + def __rmul__(self, y: numbers.Number) -> Interval[_OrderableT]: ... + def __truediv__(self, y: numbers.Number) -> Interval[_OrderableT]: ... + def __floordiv__(self, y: numbers.Number) -> Interval[_OrderableT]: ... + def __hash__(self) -> int: ... + def __contains__(self: Interval[_OrderableT], key: _OrderableT) -> bool: ... + def overlaps(self, other: Interval[_OrderableT]) -> bool: ... + +VALID_CLOSED: frozenset[str] + +# takes npt.NDArray[Interval[_OrderableT]] and returns arrays of type +# _OrderableT but _Orderable is not a valid dtype +def intervals_to_interval_bounds( + intervals: npt.NDArray[np.object_], validate_closed: bool = ... +) -> tuple[np.ndarray, np.ndarray, str]: ... + +# from pandas/_libs/intervaltree.pxi.in +_GenericT = TypeVar("_GenericT", bound=np.generic) + +# error: Value of type variable "_OrderableMixinT" of "IntervalMixin" +# cannot be "ndarray" +class IntervalTree( + Generic[_GenericT], + IntervalMixin[npt.NDArray[_GenericT]], # type: ignore[type-var] +): + _na_count: int + def __init__( + self, + left: npt.NDArray[_GenericT], + right: npt.NDArray[_GenericT], + closed: IntervalBound = ..., + leaf_size: int = ..., + ) -> None: ... + @property + def left_sorter(self) -> npt.NDArray[_GenericT]: ... + @property + def right_sorter(self) -> npt.NDArray[_GenericT]: ... + @property + def is_overlapping(self) -> bool: ... + @property + def is_monotonic_increasing(self) -> bool: ... + def get_indexer(self, target: np.ndarray) -> npt.NDArray[np.intp]: ... + def get_indexer_non_unique( + self, target: np.ndarray + ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ... + def __repr__(self) -> str: ... + def clear_mapping(self) -> None: ... diff --git a/pandas/_typing.py b/pandas/_typing.py index c0383fe50a7e7..56c1ce7312512 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -83,10 +83,9 @@ PythonScalar = Union[str, int, float, bool] DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"] PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] -Scalar = Union[PythonScalar, PandasScalar] +Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64] IntStrT = TypeVar("IntStrT", int, str) - # timestamp and timedelta convertible types TimestampConvertibleTypes = Union[ @@ -304,3 +303,6 @@ def closed(self) -> bool: # read_xml parsers XMLParsers = Literal["lxml", "etree"] + +# on which side(s) Interval is closed +IntervalBound = Literal["left", "right", "both", "neither"] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8c10b62d83f9e..c57261c810663 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -759,7 +759,7 @@ def factorize( else: dtype = values.dtype values = _ensure_data(values) - na_value: Scalar + na_value: Scalar | None if original.dtype.kind in ["m", "M"]: # Note: factorize_array will cast NaT bc it has a __int__ diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index d23910c37b52b..de64c891f68d2 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -29,6 +29,7 @@ from pandas._typing import ( ArrayLike, Dtype, + IntervalBound, NpDtype, PositionalIndexer, ScalarIndexer, @@ -196,6 +197,9 @@ class IntervalArray(IntervalMixin, ExtensionArray): ndim = 1 can_hold_na = True _na_value = _fill_value = np.nan + _left: np.ndarray + _right: np.ndarray + _dtype: IntervalDtype # --------------------------------------------------------------------- # Constructors @@ -657,11 +661,7 @@ def __getitem__( if is_scalar(left) and isna(left): return self._fill_value return Interval(left, right, self.closed) - # error: Argument 1 to "ndim" has incompatible type "Union[ndarray, - # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes, - # generic], Sequence[Union[int, float, complex, str, bytes, generic]], - # Sequence[Sequence[Any]], _SupportsArray]" - if np.ndim(left) > 1: # type: ignore[arg-type] + if np.ndim(left) > 1: # GH#30588 multi-dimensional indexer disallowed raise ValueError("multi-dimensional indexing not allowed") return self._shallow_copy(left, right) @@ -945,10 +945,10 @@ def _concat_same_type( ------- IntervalArray """ - closed = {interval.closed for interval in to_concat} - if len(closed) != 1: + closed_set = {interval.closed for interval in to_concat} + if len(closed_set) != 1: raise ValueError("Intervals must all be closed on the same side.") - closed = closed.pop() + closed = closed_set.pop() left = np.concatenate([interval.left for interval in to_concat]) right = np.concatenate([interval.right for interval in to_concat]) @@ -1317,7 +1317,7 @@ def overlaps(self, other): # --------------------------------------------------------------------- @property - def closed(self): + def closed(self) -> IntervalBound: """ Whether the intervals are closed on the left-side, right-side, both or neither. @@ -1665,8 +1665,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: dtype = self._left.dtype if needs_i8_conversion(dtype): - new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype) - new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_left = type(self._left)._from_sequence( # type: ignore[attr-defined] + nc[:, 0], dtype=dtype + ) + # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence" + new_right = type(self._right)._from_sequence( # type: ignore[attr-defined] + nc[:, 1], dtype=dtype + ) else: new_left = nc[:, 0].view(dtype) new_right = nc[:, 1].view(dtype) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 21f44dbc6a1cd..42afc7405845f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -336,7 +336,7 @@ def to_numpy( self, dtype: npt.DTypeLike | None = None, copy: bool = False, - na_value: Scalar = lib.no_default, + na_value: Scalar | libmissing.NAType | lib.NoDefault = lib.no_default, ) -> np.ndarray: """ Convert to a NumPy Array. diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 3503b54dd478a..002def4d31e72 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -762,7 +762,7 @@ def _str_replace( return type(self)(result) def _str_match( - self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None + self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): if pa_version_under4p0: return super()._str_match(pat, case, flags, na) @@ -771,7 +771,9 @@ def _str_match( pat = "^" + pat return self._str_contains(pat, case, flags, na, regex=True) - def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None): + def _str_fullmatch( + self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None + ): if pa_version_under4p0: return super()._str_fullmatch(pat, case, flags, na) diff --git a/pandas/core/common.py b/pandas/core/common.py index 94fb09ddc79b3..62c2034505589 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -35,7 +35,6 @@ ArrayLike, NpDtype, RandomState, - Scalar, T, ) from pandas.util._exceptions import find_stack_level @@ -517,7 +516,7 @@ def f(x): def convert_to_list_like( - values: Scalar | Iterable | AnyArrayLike, + values: Hashable | Iterable | AnyArrayLike, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1e39c1db1a73b..5a17513d341af 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -9,6 +9,7 @@ from typing import ( Any, Hashable, + Literal, ) import numpy as np @@ -28,6 +29,7 @@ from pandas._typing import ( Dtype, DtypeObj, + IntervalBound, npt, ) from pandas.errors import InvalidIndexError @@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex): _typ = "intervalindex" # annotate properties pinned via inherit_names - closed: str + closed: IntervalBound is_non_overlapping_monotonic: bool closed_left: bool closed_right: bool + open_left: bool + open_right: bool _data: IntervalArray _values: IntervalArray @@ -246,7 +250,7 @@ def __new__( def from_breaks( cls, breaks, - closed: str = "right", + closed: IntervalBound = "right", name: Hashable = None, copy: bool = False, dtype: Dtype | None = None, @@ -277,7 +281,7 @@ def from_arrays( cls, left, right, - closed: str = "right", + closed: IntervalBound = "right", name: Hashable = None, copy: bool = False, dtype: Dtype | None = None, @@ -307,7 +311,7 @@ def from_arrays( def from_tuples( cls, data, - closed: str = "right", + closed: IntervalBound = "right", name: Hashable = None, copy: bool = False, dtype: Dtype | None = None, @@ -318,8 +322,10 @@ def from_tuples( # -------------------------------------------------------------------- + # error: Return type "IntervalTree[Any]" of "_engine" incompatible with return type + # "IndexEngine" in supertype "Index" @cache_readonly - def _engine(self) -> IntervalTree: + def _engine(self) -> IntervalTree: # type: ignore[override] left = self._maybe_convert_i8(self.left) right = self._maybe_convert_i8(self.right) return IntervalTree(left, right, closed=self.closed) @@ -511,7 +517,10 @@ def _maybe_convert_i8(self, key): left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays - return constructor(left, right, closed=self.closed) + # error: "object" not callable + return constructor( # type: ignore[operator] + left, right, closed=self.closed + ) if scalar: # Timestamp/Timedelta @@ -543,7 +552,7 @@ def _maybe_convert_i8(self, key): return key_i8 - def _searchsorted_monotonic(self, label, side: str = "left"): + def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): if not self.is_non_overlapping_monotonic: raise KeyError( "can only get slices from an IntervalIndex if bounds are " @@ -663,7 +672,9 @@ def _get_indexer( # homogeneous scalar index: use IntervalTree # we should always have self._should_partial_index(target) here target = self._maybe_convert_i8(target) - indexer = self._engine.get_indexer(target.values) + # error: Argument 1 to "get_indexer" of "IntervalTree" has incompatible type + # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]" + indexer = self._engine.get_indexer(target.values) # type: ignore[arg-type] else: # heterogeneous scalar index: defer elementwise to get_loc # we should always have self._should_partial_index(target) here @@ -698,7 +709,12 @@ def get_indexer_non_unique( # Note: this case behaves differently from other Index subclasses # because IntervalIndex does partial-int indexing target = self._maybe_convert_i8(target) - indexer, missing = self._engine.get_indexer_non_unique(target.values) + # error: Argument 1 to "get_indexer_non_unique" of "IntervalTree" has + # incompatible type "Union[ExtensionArray, ndarray[Any, Any]]"; expected + # "ndarray[Any, Any]" [arg-type] + indexer, missing = self._engine.get_indexer_non_unique( + target.values # type: ignore[arg-type] + ) return ensure_platform_int(indexer), ensure_platform_int(missing) @@ -941,7 +957,12 @@ def _is_type_compatible(a, b) -> bool: def interval_range( - start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right" + start=None, + end=None, + periods=None, + freq=None, + name: Hashable = None, + closed: IntervalBound = "right", ) -> IntervalIndex: """ Return a fixed frequency IntervalIndex. diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index d8c4f3f3da765..ae948eda93e7c 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -560,7 +560,7 @@ def _format_labels( bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None ): """based on the dtype, return our labels""" - closed = "right" if right else "left" + closed: Literal["right", "left"] = "right" if right else "left" formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta] diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 6adce02dc50f0..5b6fc78dad043 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from pandas._typing import ( @@ -16,6 +18,9 @@ from pandas.io.excel._base import BaseExcelReader +if TYPE_CHECKING: + from pandas._libs.tslibs.nattype import NaTType + @doc(storage_options=_shared_docs["storage_options"]) class ODFReader(BaseExcelReader): @@ -81,7 +86,7 @@ def get_sheet_by_name(self, name: str): self.close() raise ValueError(f"sheet {name} not found") - def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: + def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar | NaTType]]: """ Parse an ODF Table into a list of lists """ @@ -99,12 +104,11 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]: empty_rows = 0 max_row_len = 0 - table: list[list[Scalar]] = [] - + table: list[list[Scalar | NaTType]] = [] for sheet_row in sheet_rows: sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] empty_cells = 0 - table_row: list[Scalar] = [] + table_row: list[Scalar | NaTType] = [] for sheet_cell in sheet_cells: if sheet_cell.qname == table_cell_name: @@ -167,7 +171,7 @@ def _is_empty_row(self, row) -> bool: return True - def _get_cell_value(self, cell, convert_float: bool) -> Scalar: + def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType: from odf.namespaces import OFFICENS if str(cell) == "#N/A": @@ -202,7 +206,10 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: elif cell_type == "time": stamp = pd.to_datetime(str(cell)) # error: Item "str" of "Union[float, str, NaTType]" has no attribute "time" - return stamp.time() # type: ignore[union-attr] + # error: Incompatible return value type (got "Union[Any, time]", expected + # "Union[Union[str, int, float, bool], Union[Period, Timestamp, Timedelta, + # Interval[Any]], datetime64, timedelta64]") + return stamp.time() # type: ignore[union-attr,return-value] else: self.close() raise ValueError(f"Unrecognized type {cell_type}") diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9d0b213e44671..3faace708b6ec 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3725,14 +3725,10 @@ def _highlight_between( Return an array of css props based on condition of data values within given range. """ if np.iterable(left) and not isinstance(left, str): - left = _validate_apply_axis_arg( - left, "left", None, data # type: ignore[arg-type] - ) + left = _validate_apply_axis_arg(left, "left", None, data) if np.iterable(right) and not isinstance(right, str): - right = _validate_apply_axis_arg( - right, "right", None, data # type: ignore[arg-type] - ) + right = _validate_apply_axis_arg(right, "right", None, data) # get ops with correct boundary attribution if inclusive == "both": diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 8a66a5c22caf5..8ba58ad7be0bc 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -893,7 +893,7 @@ def _clear_buffer(self) -> None: def _get_index_name( self, columns: list[Hashable] - ) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]: + ) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]: """ Try several cases to get lines: