Skip to content

Commit 93ba57a

Browse files
authored
Typeinterval part1 (#46080)
1 parent 7ee8ab0 commit 93ba57a

File tree

12 files changed

+79
-34
lines changed

12 files changed

+79
-34
lines changed

pandas/_typing.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
PythonScalar = Union[str, int, float, bool]
8484
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
8585
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
86-
Scalar = Union[PythonScalar, PandasScalar]
86+
Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
8787
IntStrT = TypeVar("IntStrT", int, str)
8888

8989

@@ -304,3 +304,7 @@ def closed(self) -> bool:
304304

305305
# read_xml parsers
306306
XMLParsers = Literal["lxml", "etree"]
307+
308+
# Interval closed type
309+
310+
IntervalClosedType = Literal["left", "right", "both", "neither"]

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ def factorize(
759759
else:
760760
dtype = values.dtype
761761
values = _ensure_data(values)
762-
na_value: Scalar
762+
na_value: Scalar | None
763763

764764
if original.dtype.kind in ["m", "M"]:
765765
# Note: factorize_array will cast NaT bc it has a __int__

pandas/core/arrays/interval.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from pandas._typing import (
3030
ArrayLike,
3131
Dtype,
32+
IntervalClosedType,
3233
NpDtype,
3334
PositionalIndexer,
3435
ScalarIndexer,
@@ -200,6 +201,11 @@ class IntervalArray(IntervalMixin, ExtensionArray):
200201
can_hold_na = True
201202
_na_value = _fill_value = np.nan
202203

204+
# To make mypy recognize the fields
205+
_left: np.ndarray
206+
_right: np.ndarray
207+
_dtype: IntervalDtype
208+
203209
# ---------------------------------------------------------------------
204210
# Constructors
205211

@@ -660,11 +666,7 @@ def __getitem__(
660666
if is_scalar(left) and isna(left):
661667
return self._fill_value
662668
return Interval(left, right, self.closed)
663-
# error: Argument 1 to "ndim" has incompatible type "Union[ndarray,
664-
# ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes,
665-
# generic], Sequence[Union[int, float, complex, str, bytes, generic]],
666-
# Sequence[Sequence[Any]], _SupportsArray]"
667-
if np.ndim(left) > 1: # type: ignore[arg-type]
669+
if np.ndim(left) > 1:
668670
# GH#30588 multi-dimensional indexer disallowed
669671
raise ValueError("multi-dimensional indexing not allowed")
670672
return self._shallow_copy(left, right)
@@ -1368,7 +1370,7 @@ def closed(self):
13681370
),
13691371
}
13701372
)
1371-
def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT:
1373+
def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
13721374
if closed not in VALID_CLOSED:
13731375
msg = f"invalid option for 'closed': {closed}"
13741376
raise ValueError(msg)
@@ -1669,8 +1671,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
16691671

16701672
dtype = self._left.dtype
16711673
if needs_i8_conversion(dtype):
1672-
new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
1673-
new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
1674+
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1675+
new_left = type(self._left)._from_sequence( # type: ignore[attr-defined]
1676+
nc[:, 0], dtype=dtype
1677+
)
1678+
# error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
1679+
new_right = type(self._right)._from_sequence( # type: ignore[attr-defined]
1680+
nc[:, 1], dtype=dtype
1681+
)
16741682
else:
16751683
new_left = nc[:, 0].view(dtype)
16761684
new_right = nc[:, 1].view(dtype)

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def to_numpy(
334334
self,
335335
dtype: npt.DTypeLike | None = None,
336336
copy: bool = False,
337-
na_value: Scalar = lib.no_default,
337+
na_value: Scalar | lib.NoDefault | libmissing.NAType = lib.no_default,
338338
) -> np.ndarray:
339339
"""
340340
Convert to a NumPy Array.

pandas/core/arrays/string_arrow.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -762,7 +762,7 @@ def _str_replace(
762762
return type(self)(result)
763763

764764
def _str_match(
765-
self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
765+
self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
766766
):
767767
if pa_version_under4p0:
768768
return super()._str_match(pat, case, flags, na)
@@ -771,7 +771,9 @@ def _str_match(
771771
pat = "^" + pat
772772
return self._str_contains(pat, case, flags, na, regex=True)
773773

774-
def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
774+
def _str_fullmatch(
775+
self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
776+
):
775777
if pa_version_under4p0:
776778
return super()._str_fullmatch(pat, case, flags, na)
777779

pandas/core/common.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
ArrayLike,
3636
NpDtype,
3737
RandomState,
38-
Scalar,
3938
T,
4039
)
4140
from pandas.util._exceptions import find_stack_level
@@ -517,7 +516,7 @@ def f(x):
517516

518517

519518
def convert_to_list_like(
520-
values: Scalar | Iterable | AnyArrayLike,
519+
values: Hashable | Iterable | AnyArrayLike,
521520
) -> list | AnyArrayLike:
522521
"""
523522
Convert list-like or scalar input to list-like. List, numpy and pandas array-like

pandas/core/generic.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
FilePath,
4747
IndexKeyFunc,
4848
IndexLabel,
49+
IntervalClosedType,
4950
JSONSerializable,
5051
Level,
5152
Manager,
@@ -7776,7 +7777,7 @@ def between_time(
77767777
end_time,
77777778
include_start: bool_t | lib.NoDefault = lib.no_default,
77787779
include_end: bool_t | lib.NoDefault = lib.no_default,
7779-
inclusive: str | None = None,
7780+
inclusive: IntervalClosedType | None = None,
77807781
axis=None,
77817782
) -> NDFrameT:
77827783
"""
@@ -7881,7 +7882,7 @@ def between_time(
78817882
left = True if isinstance(include_start, lib.NoDefault) else include_start
78827883
right = True if isinstance(include_end, lib.NoDefault) else include_end
78837884

7884-
inc_dict = {
7885+
inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = {
78857886
(True, True): "both",
78867887
(True, False): "left",
78877888
(False, True): "right",

pandas/core/indexes/datetimes.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from pandas._typing import (
3434
Dtype,
3535
DtypeObj,
36+
IntervalClosedType,
3637
npt,
3738
)
3839
from pandas.util._decorators import (
@@ -884,8 +885,8 @@ def date_range(
884885
tz=None,
885886
normalize: bool = False,
886887
name: Hashable = None,
887-
closed: str | None | lib.NoDefault = lib.no_default,
888-
inclusive: str | None = None,
888+
closed: Literal["left", "right"] | None | lib.NoDefault = lib.no_default,
889+
inclusive: IntervalClosedType | None = None,
889890
**kwargs,
890891
) -> DatetimeIndex:
891892
"""
@@ -1091,7 +1092,7 @@ def bdate_range(
10911092
weekmask=None,
10921093
holidays=None,
10931094
closed: lib.NoDefault = lib.no_default,
1094-
inclusive: str | None = None,
1095+
inclusive: IntervalClosedType | None = None,
10951096
**kwargs,
10961097
) -> DatetimeIndex:
10971098
"""

pandas/core/indexes/interval.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import (
1010
Any,
1111
Hashable,
12+
Literal,
1213
)
1314

1415
import numpy as np
@@ -28,6 +29,7 @@
2829
from pandas._typing import (
2930
Dtype,
3031
DtypeObj,
32+
IntervalClosedType,
3133
npt,
3234
)
3335
from pandas.errors import InvalidIndexError
@@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex):
191193
_typ = "intervalindex"
192194

193195
# annotate properties pinned via inherit_names
194-
closed: str
196+
closed: IntervalClosedType
195197
is_non_overlapping_monotonic: bool
196198
closed_left: bool
197199
closed_right: bool
200+
open_left: bool
201+
open_right: bool
198202

199203
_data: IntervalArray
200204
_values: IntervalArray
@@ -543,7 +547,7 @@ def _maybe_convert_i8(self, key):
543547

544548
return key_i8
545549

546-
def _searchsorted_monotonic(self, label, side: str = "left"):
550+
def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
547551
if not self.is_non_overlapping_monotonic:
548552
raise KeyError(
549553
"can only get slices from an IntervalIndex if bounds are "
@@ -941,7 +945,12 @@ def _is_type_compatible(a, b) -> bool:
941945

942946

943947
def interval_range(
944-
start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right"
948+
start=None,
949+
end=None,
950+
periods=None,
951+
freq=None,
952+
name: Hashable = None,
953+
closed: IntervalClosedType = "right",
945954
) -> IntervalIndex:
946955
"""
947956
Return a fixed frequency IntervalIndex.

pandas/core/tools/datetimes.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
Hashable,
1111
List,
1212
Tuple,
13-
TypeVar,
1413
Union,
14+
cast,
1515
overload,
1616
)
1717
import warnings
@@ -66,6 +66,7 @@
6666
)
6767
from pandas.core import algorithms
6868
from pandas.core.algorithms import unique
69+
from pandas.core.arrays.base import ExtensionArray
6970
from pandas.core.arrays.datetimes import (
7071
maybe_convert_dtype,
7172
objects_to_datetime64ns,
@@ -85,7 +86,8 @@
8586

8687
ArrayConvertible = Union[List, Tuple, AnyArrayLike, "Series"]
8788
Scalar = Union[int, float, str]
88-
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
89+
DatetimeScalar = Union[Scalar, datetime]
90+
8991
DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, ArrayConvertible]
9092
start_caching_at = 50
9193

@@ -638,7 +640,7 @@ def to_datetime(
638640
infer_datetime_format: bool = ...,
639641
origin=...,
640642
cache: bool = ...,
641-
) -> DatetimeScalar | NaTType:
643+
) -> Timestamp | NaTType:
642644
...
643645

644646

@@ -1061,6 +1063,13 @@ def to_datetime(
10611063
result = convert_listlike(arg, format, name=arg.name)
10621064
elif is_list_like(arg):
10631065
try:
1066+
# error: Argument 1 to "_maybe_cache" has incompatible type
1067+
# "Union[float, str, datetime, List[Any], Tuple[Any, ...], ExtensionArray,
1068+
# ndarray[Any, Any], Series]"; expected "Union[List[Any], Tuple[Any, ...],
1069+
# Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], Series]"
1070+
arg = cast(
1071+
Union[list, tuple, ExtensionArray, np.ndarray, "Series", Index], arg
1072+
)
10641073
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
10651074
except OutOfBoundsDatetime:
10661075
# caching attempts to create a DatetimeIndex, which may raise

pandas/io/excel/_odfreader.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from __future__ import annotations
22

3+
from typing import (
4+
TYPE_CHECKING,
5+
cast,
6+
)
7+
38
import numpy as np
49

510
from pandas._typing import (
@@ -16,6 +21,9 @@
1621

1722
from pandas.io.excel._base import BaseExcelReader
1823

24+
if TYPE_CHECKING:
25+
from pandas._libs.tslibs.nattype import NaTType
26+
1927

2028
@doc(storage_options=_shared_docs["storage_options"])
2129
class ODFReader(BaseExcelReader):
@@ -81,7 +89,9 @@ def get_sheet_by_name(self, name: str):
8189
self.close()
8290
raise ValueError(f"sheet {name} not found")
8391

84-
def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
92+
def get_sheet_data(
93+
self, sheet, convert_float: bool
94+
) -> list[list[Scalar | NaTType]]:
8595
"""
8696
Parse an ODF Table into a list of lists
8797
"""
@@ -99,12 +109,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> list[list[Scalar]]:
99109
empty_rows = 0
100110
max_row_len = 0
101111

102-
table: list[list[Scalar]] = []
112+
table: list[list[Scalar | NaTType]] = []
103113

104114
for sheet_row in sheet_rows:
105115
sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names]
106116
empty_cells = 0
107-
table_row: list[Scalar] = []
117+
table_row: list[Scalar | NaTType] = []
108118

109119
for sheet_cell in sheet_cells:
110120
if sheet_cell.qname == table_cell_name:
@@ -167,7 +177,7 @@ def _is_empty_row(self, row) -> bool:
167177

168178
return True
169179

170-
def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
180+
def _get_cell_value(self, cell, convert_float: bool) -> Scalar | NaTType:
171181
from odf.namespaces import OFFICENS
172182

173183
if str(cell) == "#N/A":
@@ -200,9 +210,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
200210
cell_value = cell.attributes.get((OFFICENS, "date-value"))
201211
return pd.to_datetime(cell_value)
202212
elif cell_type == "time":
203-
stamp = pd.to_datetime(str(cell))
204-
# error: Item "str" of "Union[float, str, NaTType]" has no attribute "time"
205-
return stamp.time() # type: ignore[union-attr]
213+
# cast needed because `pd.to_datetime can return NaTType,
214+
# but we know this is a valid time
215+
stamp = cast(pd.Timestamp, pd.to_datetime(str(cell)))
216+
# cast needed here because Scalar doesn't include datetime.time
217+
return cast(Scalar, stamp.time())
206218
else:
207219
self.close()
208220
raise ValueError(f"Unrecognized type {cell_type}")

pandas/io/parsers/python_parser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ def _clear_buffer(self) -> None:
892892

893893
def _get_index_name(
894894
self, columns: list[Hashable]
895-
) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]:
895+
) -> tuple[Sequence[Hashable] | None, list[Hashable], list[Hashable]]:
896896
"""
897897
Try several cases to get lines:
898898

0 commit comments

Comments
 (0)