Skip to content

Typeinterval part2 #46098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Feb 27, 2022
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9e57d6d
fix column_arrays for array manager
Dr-Irv Dec 21, 2021
2abbc57
merge with upstream/main
Dr-Irv Jan 15, 2022
99158b1
Merge remote-tracking branch 'upstream/main'
Dr-Irv Jan 24, 2022
0f4130d
Merge remote-tracking branch 'upstream/main'
Dr-Irv Feb 4, 2022
24ecfe2
Merge remote-tracking branch 'upstream/main'
Dr-Irv Feb 8, 2022
64f00d5
Merge remote-tracking branch 'upstream/main'
Dr-Irv Feb 9, 2022
1ee6e00
Merge remote-tracking branch 'upstream/main'
Dr-Irv Feb 19, 2022
34dc181
fix up typing to eventually support Interval typing
Dr-Irv Feb 20, 2022
a547800
fix imports in core/tools/datetimes.py
Dr-Irv Feb 20, 2022
3a3bfea
fix time in odfreader
Dr-Irv Feb 20, 2022
5edf5c9
pandas/core/arrays/masked.py
Dr-Irv Feb 21, 2022
7c4cd5c
use cast instead of new code in odfreader
Dr-Irv Feb 21, 2022
649ef07
fix odfreader
Dr-Irv Feb 21, 2022
1c1ba2c
interval pyi
Dr-Irv Feb 21, 2022
35c2af0
use cast in odfreader
Dr-Irv Feb 21, 2022
9769356
use cast in odfreader
Dr-Irv Feb 21, 2022
607b367
fixes for mid, length to mixin, change use of protocol
Dr-Irv Feb 21, 2022
f9f0820
misc cleanup from twoertwein
Dr-Irv Feb 22, 2022
52f376f
remove generic from IntervalTree
Dr-Irv Feb 22, 2022
40be56f
Merge remote-tracking branch 'upstream/main' into typeinterval_part1
Dr-Irv Feb 23, 2022
4fcf523
clean up based on comments. simplify python_parser changes. Introduc…
Dr-Irv Feb 23, 2022
3b68c6a
Merge branch 'typeinterval_part1' into typeinterval_part2
Dr-Irv Feb 23, 2022
d6de171
Merge remote-tracking branch 'upstream/main' into typeinterval_part2
Dr-Irv Feb 27, 2022
9e11b8c
Feedback from twoertwein
Dr-Irv Feb 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions pandas/_libs/interval.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from __future__ import annotations

import sys
from typing import (
Any,
Generic,
Protocol,
TypeVar,
Union,
overload,
)

import numpy as np

from pandas._typing import (
Timedelta,
Timestamp,
)

if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal

VALID_CLOSED: frozenset

OrderableScalarT = TypeVar("OrderableScalarT", int, float)
OrderableTimesT = TypeVar("OrderableTimesT", Timestamp, Timedelta)
OrderableT = TypeVar("OrderableT", int, float, Timestamp, Timedelta)

class IntervalMixinProtocol(Protocol): ...

class _LengthDescriptor:
@overload
def __get__(self, instance: Interval[float], owner: Any) -> float: ...
@overload
def __get__(self, instance: Interval[int], owner: Any) -> int: ...
@overload
def __get__(self, instance: Interval[OrderableTimesT], owner: Any) -> Timedelta: ...

class _MidDescriptor:
@overload
def __get__(self, instance: Interval[OrderableScalarT], owner: Any) -> float: ...
@overload
def __get__(self, instance: Interval[Timedelta], owner: Any) -> Timedelta: ...
@overload
def __get__(self, instance: Interval[Timestamp], owner: Any) -> Timestamp: ...

class IntervalMixin(IntervalMixinProtocol):
@property
def closed_left(self) -> bool: ...
@property
def closed_right(self) -> bool: ...
@property
def open_left(self) -> bool: ...
@property
def open_right(self) -> bool: ...
mid: _MidDescriptor
length: _LengthDescriptor
@property
def is_empty(self) -> bool: ...
def _check_closed_matches(self, other: IntervalMixin, name: str = ...): ...

class Interval(IntervalMixin, Generic[OrderableT]):
@property
def left(self: Interval[OrderableT]) -> OrderableT: ...
@property
def right(self: Interval[OrderableT]) -> OrderableT: ...
@property
def closed(self) -> str: ...
def __init__(
self,
left: OrderableT,
right: OrderableT,
closed: Literal["left", "right", "both", "neither"] = ...,
): ...
def __hash__(self) -> int: ...
@overload
def __contains__(self: Interval[OrderableTimesT], OrderableTimesT) -> bool: ...
@overload
def __contains__(self: Interval[int], key: Union[int, float]) -> bool: ...
@overload
def __contains__(self: Interval[float], key: Union[int, float]) -> bool: ...
def __repr__(self) -> str: ...
def __str__(self) -> str: ...
@overload
def __add__(
self: Interval[OrderableTimesT], y: Timedelta
) -> Interval[OrderableTimesT]: ...
@overload
def __add__(self: Interval[int], y: int) -> Interval[int]: ...
@overload
def __add__(self: Interval[int], y: float) -> Interval[float]: ...
@overload
def __add__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
@overload
def __sub__(
self: Interval[OrderableTimesT], y: Timedelta
) -> Interval[OrderableTimesT]: ...
@overload
def __sub__(self: Interval[int], y: int) -> Interval[int]: ...
@overload
def __sub__(self: Interval[int], y: float) -> Interval[float]: ...
@overload
def __sub__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
@overload
def __mul__(self: Interval[int], y: int) -> Interval[int]: ...
@overload
def __mul__(self: Interval[int], y: float) -> Interval[float]: ...
@overload
def __mul__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
@overload
def __truediv__(self: Interval[int], y: int) -> Interval[int]: ...
@overload
def __truediv__(self: Interval[int], y: float) -> Interval[float]: ...
@overload
def __truediv__(self: Interval[float], y: Union[int, float]) -> Interval[float]: ...
@overload
def __floordiv__(self: Interval[int], y: int) -> Interval[int]: ...
@overload
def __floordiv__(self: Interval[int], y: float) -> Interval[float]: ...
@overload
def __floordiv__(
self: Interval[float], y: Union[int, float]
) -> Interval[float]: ...
def overlaps(self: Interval[OrderableT], other: Interval[OrderableT]) -> bool: ...

def intervals_to_interval_bounds(intervals: np.ndarray, validate_closed: int = ...): ...

class IntervalTree(IntervalMixin):
def __init__(
self,
left: np.ndarray,
right: np.ndarray,
closed: Literal["left", "right", "both", "neither"] = ...,
): ...
def get_indexer(self, target) -> np.ndarray: ...
def get_indexer_non_unique(self, target) -> np.ndarray: ...
_na_count: int
@property
def is_overlapping(self) -> bool: ...
2 changes: 1 addition & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
PythonScalar = Union[str, int, float, bool]
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
Scalar = Union[PythonScalar, PandasScalar]
Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
IntStrT = TypeVar("IntStrT", int, str)


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def factorize(
else:
dtype = values.dtype
values = _ensure_data(values)
na_value: Scalar
na_value: Scalar | None

if original.dtype.kind in ["m", "M"]:
# Note: factorize_array will cast NaT bc it has a __int__
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
)
import textwrap
from typing import (
Any,
Sequence,
TypeVar,
Union,
Expand Down Expand Up @@ -197,6 +198,11 @@ class IntervalArray(IntervalMixin, ExtensionArray):
can_hold_na = True
_na_value = _fill_value = np.nan

# To make mypy recognize the fields
_left: Any
_right: Any
_dtype: Any

# ---------------------------------------------------------------------
# Constructors

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def to_numpy(
self,
dtype: npt.DTypeLike | None = None,
copy: bool = False,
na_value: Scalar = lib.no_default,
na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default,
) -> np.ndarray:
"""
Convert to a NumPy Array.
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ def _str_replace(
return type(self)(result)

def _str_match(
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if pa_version_under4p0:
return super()._str_match(pat, case, flags, na)
Expand All @@ -771,7 +771,9 @@ def _str_match(
pat = "^" + pat
return self._str_contains(pat, case, flags, na, regex=True)

def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
def _str_fullmatch(
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
):
if pa_version_under4p0:
return super()._str_fullmatch(pat, case, flags, na)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ def f(x):


def convert_to_list_like(
values: Scalar | Iterable | AnyArrayLike,
values: Scalar | Iterable | AnyArrayLike | Hashable,
) -> list | AnyArrayLike:
"""
Convert list-like or scalar input to list-like. List, numpy and pandas array-like
Expand Down
17 changes: 12 additions & 5 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
Any,
Hashable,
Literal,
)

import numpy as np
Expand Down Expand Up @@ -191,10 +192,12 @@ class IntervalIndex(ExtensionIndex):
_typ = "intervalindex"

# annotate properties pinned via inherit_names
closed: str
closed: Literal["left", "right", "both", "neither"]
is_non_overlapping_monotonic: bool
closed_left: bool
closed_right: bool
open_left: bool
open_right: bool

_data: IntervalArray
_values: IntervalArray
Expand Down Expand Up @@ -317,9 +320,10 @@ def from_tuples(
return cls._simple_new(arr, name=name)

# --------------------------------------------------------------------

# error: Return type "IntervalTree" of "_engine" incompatible with return type
# "Union[IndexEngine, ExtensionEngine]" in supertype "Index"
@cache_readonly
def _engine(self) -> IntervalTree:
def _engine(self) -> IntervalTree: # type: ignore[override]
left = self._maybe_convert_i8(self.left)
right = self._maybe_convert_i8(self.right)
return IntervalTree(left, right, closed=self.closed)
Expand Down Expand Up @@ -511,7 +515,10 @@ def _maybe_convert_i8(self, key):
left = self._maybe_convert_i8(key.left)
right = self._maybe_convert_i8(key.right)
constructor = Interval if scalar else IntervalIndex.from_arrays
return constructor(left, right, closed=self.closed)
# error: "object" not callable
return constructor(
left, right, closed=self.closed
) # type: ignore[operator]

if scalar:
# Timestamp/Timedelta
Expand Down Expand Up @@ -543,7 +550,7 @@ def _maybe_convert_i8(self, key):

return key_i8

def _searchsorted_monotonic(self, label, side: str = "left"):
def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
if not self.is_non_overlapping_monotonic:
raise KeyError(
"can only get slices from an IntervalIndex if bounds are "
Expand Down
15 changes: 12 additions & 3 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
Hashable,
List,
Tuple,
TypeVar,
Union,
cast,
overload,
)
import warnings
Expand Down Expand Up @@ -66,6 +66,7 @@
)
from pandas.core import algorithms
from pandas.core.algorithms import unique
from pandas.core.arrays.base import ExtensionArray
from pandas.core.arrays.datetimes import (
maybe_convert_dtype,
objects_to_datetime64ns,
Expand All @@ -85,7 +86,8 @@

ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
Scalar = Union[int, float, str]
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
DatetimeScalar = Union[Scalar, datetime]

DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
start_caching_at = 50

Expand Down Expand Up @@ -638,7 +640,7 @@ def to_datetime(
infer_datetime_format: bool = ...,
origin=...,
cache: bool = ...,
) -> DatetimeScalar | NaTType:
) -> Timestamp | NaTType:
...


Expand Down Expand Up @@ -1061,6 +1063,13 @@ def to_datetime(
result = convert_listlike(arg, format, name=arg.name)
elif is_list_like(arg):
try:
# error: Argument 1 to "_maybe_cache" has incompatible type
# "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,
# ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],
# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"
arg = cast(
Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg
)
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
except OutOfBoundsDatetime:
# caching attempts to create a DatetimeIndex, which may raise
Expand Down
21 changes: 14 additions & 7 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

from typing import cast

import numpy as np

from pandas._libs.tslibs.nattype import NaTType
from pandas._typing import (
FilePath,
ReadBuffer,
Expand Down Expand Up @@ -81,7 +84,9 @@ def get_sheet_by_name(self, name: str):
self.close()
raise ValueError(f"sheet {name} not found")

def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
def get_sheet_data(
self, sheet, convert_float: bool
) -> list[list[Scalar | NaTType]]:
"""
Parse an ODF Table into a list of lists
"""
Expand All @@ -99,12 +104,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
empty_rows = 0
max_row_len = 0

table: list[list[Scalar]] = []
table: list[list[Scalar | NaTType]] = []

for sheet_row in sheet_rows:
sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
empty_cells = 0
table_row: list[Scalar] = []
table_row: list[Scalar | NaTType] = []

for sheet_cell in sheet_cells:
if sheet_cell.qname == table_cell_name:
Expand Down Expand Up @@ -167,7 +172,7 @@ def _is_empty_row(self, row) -> bool:

return True

def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType:
from odf.namespaces import OFFICENS

if str(cell) == "#N/A":
Expand Down Expand Up @@ -200,9 +205,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
cell_value = cell.attributes.get((OFFICENS, "date-value"))
return pd.to_datetime(cell_value)
elif cell_type == "time":
stamp = pd.to_datetime(str(cell))
# error: Item "str" of "Union[float, str, NaTType]" has no attribute "time"
return stamp.time() # type: ignore[union-attr]
# cast needed because `pd.to_datetime can return NaTType,
# but we know this is a valid time
stamp = cast(pd.Timestamp, pd.to_datetime(str(cell)))
# cast needed here because Scalar doesn't include datetime.time
return cast(Scalar, stamp.time())
else:
self.close()
raise ValueError(f"Unrecognized type {cell_type}")
Expand Down
Loading