Skip to content

TYP: libinterval #41059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions pandas/_libs/interval.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np

from pandas import (
Timedelta,
Timestamp,
)

VALID_CLOSED: frozenset[str]


class IntervalMixin:
closed: str

@property
def closed_left(self) -> bool: ...

@property
def closed_right(self) -> bool: ...

@property
def open_left(self) -> bool: ...

@property
def open_right(self) -> bool: ...

@property
def mid(self): ...

@property
def length(self): ...

@property
def is_empty(self): ...

def _check_closed_matches(self, other, name: str = ...) -> None: ...


class Interval(IntervalMixin):
left: int | float | Timestamp | Timedelta
right: int | float | Timestamp | Timedelta

def __init__(self, left, right, closed: str = ...): ...

def __contains__(self, key) -> bool: ...
def __str__(self) -> str: ...
def __add__(self, y): ...
def __sub__(self, y): ...
def __mul__(self, y): ...
def __truediv__(self, y): ...
def __floordiv__(self, y): ...

def overlaps(self, other: Interval) -> bool: ...


def intervals_to_interval_bounds(
intervals: np.ndarray,
validate_closed: bool = ...,
) -> tuple[np.ndarray, np.ndarray, str]: ...


class IntervalTree(IntervalMixin):
def __init__(self, left, right, closed=..., leaf_size=...): ...

@property
def left_sorter(self) -> np.ndarray: ... # np.ndarray[np.intp]

@property
def right_sorter(self) -> np.ndarray: ... # np.ndarray[np.intp]

@property
def is_overlapping(self) -> bool: ...

@property
def is_monotonic_increasing(self) -> bool: ...

def get_indexer(
self,
target: np.ndarray, # scalar_t[:]
) -> np.ndarray: ... # np.ndarray[np.intp]

def get_indexer_non_unique(
self,
target: np.ndarray, # scalar_t[:]
) -> tuple[
np.ndarray, # np.ndarray[np.intp]
np.ndarray, # np.ndarray[np.intp]
]: ...

def clear_mapping(self) -> None: ...
6 changes: 4 additions & 2 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import (
date,
datetime,
time,
timedelta,
tzinfo,
)
Expand Down Expand Up @@ -86,10 +88,10 @@

# scalars

PythonScalar = Union[str, int, float, bool]
PythonScalar = Optional[Union[str, int, float, complex, bool, date, time, timedelta]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of Optional, can you use type(None) to be more explict?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should just do a pass to modernize this entire file

DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
Scalar = Union[PythonScalar, PandasScalar]
Scalar = Union[PythonScalar, PandasScalar, np.number, np.datetime64, np.timedelta64]

# timestamp and timedelta convertible types

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,10 @@ def contains(cat, key, container):
return any(loc_ in container for loc_ in loc)


class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin):
# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
class Categorical( # type: ignore[misc]
NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMixin
):
"""
Represent a categorical variable in classic R / S-plus fashion.

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ class InvalidComparison(Exception):
pass


class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
class DatetimeLikeArrayMixin( # type: ignore[misc]
OpsMixin, NDArrayBackedExtensionArray
):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ def f(self):
return property(f)


class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc]
"""
Pandas ExtensionArray for tz-naive or tz-aware datetime data.

Expand Down
50 changes: 42 additions & 8 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
)
import textwrap
from typing import (
TYPE_CHECKING,
Generic,
Sequence,
TypeVar,
cast,
Expand Down Expand Up @@ -84,7 +86,14 @@
unpack_zerodim_and_defer,
)

if TYPE_CHECKING:
from pandas.core.arrays import (
DatetimeArray,
TimedeltaArray,
)

IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
S = TypeVar("S", np.ndarray, "DatetimeArray", "TimedeltaArray")

_interval_shared_docs: dict[str, str] = {}

Expand Down Expand Up @@ -186,11 +195,15 @@
),
}
)
class IntervalArray(IntervalMixin, ExtensionArray):
class IntervalArray(IntervalMixin, ExtensionArray, Generic[S]):
ndim = 1
can_hold_na = True
_na_value = _fill_value = np.nan

_dtype: IntervalDtype
_left: S
_right: S

# ---------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -586,7 +599,12 @@ def _validate(self):
"location both left and right sides"
)
raise ValueError(msg)
if not (self._left[left_mask] <= self._right[left_mask]).all():
# error: Item "bool" of "Union[Any, bool]" has no attribute "all"
if not ( # type: ignore[union-attr]
# error: Unsupported operand types for <= ("Timestamp" and "Timedelta")
self._left[left_mask] # type: ignore[operator]
<= self._right[left_mask]
).all():
msg = "left side of interval must be <= right side"
raise ValueError(msg)

Expand Down Expand Up @@ -930,9 +948,9 @@ def shift(
from pandas import Index

fill_value = Index(self._left, copy=False)._na_value
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
empty = type(self).from_breaks([fill_value] * (empty_len + 1))
else:
empty = self._from_sequence([fill_value] * empty_len)
empty = type(self)._from_sequence([fill_value] * empty_len)

if periods > 0:
a = empty
Expand Down Expand Up @@ -1355,15 +1373,31 @@ def is_non_overlapping_monotonic(self) -> bool:
# at a point when both sides of intervals are included
if self.closed == "both":
return bool(
(self._right[:-1] < self._left[1:]).all()
or (self._left[:-1] > self._right[1:]).all()
# error: Item "bool" of "Union[Any, bool]" has no attribute "all"
# error: Unsupported operand types for > ("Timedelta" and "Timestamp")
( # type: ignore[union-attr]
self._right[:-1] < self._left[1:] # type: ignore[operator]
).all()
# error: Item "bool" of "Union[Any, bool]" has no attribute "all"
# error: Unsupported operand types for > ("Timedelta" and "Timestamp")
or ( # type: ignore[union-attr]
self._left[:-1] > self._right[1:] # type: ignore[operator]
).all()
)

# non-strict inequality when closed != 'both'; at least one side is
# not included in the intervals, so equality does not imply overlapping
return bool(
(self._right[:-1] <= self._left[1:]).all()
or (self._left[:-1] >= self._right[1:]).all()
# error: Item "bool" of "Union[Any, bool]" has no attribute "all"
# error: Unsupported operand types for <= ("Timestamp" and "Timedelta")
( # type: ignore[union-attr]
self._right[:-1] <= self._left[1:] # type: ignore[operator]
).all()
# error: Item "bool" of "Union[Any, bool]" has no attribute "all"
# error: Unsupported operand types for >= ("Timedelta" and "Timestamp")
or ( # type: ignore[union-attr]
self._left[:-1] >= self._right[1:] # type: ignore[operator]
).all()
)

# ---------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def to_numpy( # type: ignore[override]
self,
dtype: NpDtype | None = None,
copy: bool = False,
na_value: Scalar = lib.no_default,
na_value: Scalar | lib.NoDefault = lib.no_default,
) -> np.ndarray:
"""
Convert to a NumPy Array.
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from pandas.core.strings.object_array import ObjectStringArrayMixin


class PandasArray(
# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
class PandasArray( # type: ignore[misc]
OpsMixin,
NDArrayBackedExtensionArray,
NDArrayOperatorsMixin,
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ class BaseStringArray(ExtensionArray):
pass


class StringArray(BaseStringArray, PandasArray):
# error: Cannot determine type of 'repeat' in base class 'ExtensionArray'
class StringArray(BaseStringArray, PandasArray): # type: ignore[misc]
"""
Extension array for string data.

Expand Down Expand Up @@ -311,8 +312,6 @@ def __init__(self, values, copy=False):
values = extract_array(values)

super().__init__(values, copy=copy)
# error: Incompatible types in assignment (expression has type "StringDtype",
# variable has type "PandasDtype")
NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
if not isinstance(values, type(self)):
self._validate()
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,8 @@ def _memory_usage(self, deep: bool = False) -> int:
return v

@doc(
algorithms.factorize,
# error: Cannot determine type of 'factorize'
algorithms.factorize, # type: ignore[has-type]
values="",
order="",
size_hint="",
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from pandas._typing import (
AnyArrayLike,
NpDtype,
Scalar,
T,
)
from pandas.compat import np_version_under1p18
Expand Down Expand Up @@ -487,7 +486,7 @@ def f(x):


def convert_to_list_like(
values: Scalar | Iterable | AnyArrayLike,
values: Any,
) -> list | AnyArrayLike:
"""
Convert list-like or scalar input to list-like. List, numpy and pandas array-like
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,12 @@ def __repr__(self) -> str:

def evaluate(self):
"""create and return the numexpr condition and filter"""
if self.terms is None:
raise ValueError(
f"cannot process expression [{self.expr}], [{self}] "
"is not a valid condition"
)

try:
self.condition = self.terms.prune(ConditionBinOp)
except AttributeError as err:
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,11 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
# GH#36541: can't fill array directly with pd.NaT
# > np.empty(10, dtype="datetime64[64]").fill(pd.NaT)
# ValueError: cannot convert float NaN to integer
value = dtype.type("NaT", "ns")
# error: Incompatible types in assignment (expression has type
# "Union[generic, Any]", variable has type "Union[Union[str, int, float,
# complex, bool, date, time, timedelta, None], Union[Period, Timestamp,
# Timedelta, Interval], number[Any], datetime64, timedelta64]")
value = dtype.type("NaT", "ns") # type: ignore[assignment]
elif isinstance(value, Timestamp):
if value.tz is None:
value = value.to_datetime64()
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from pandas.errors import InvalidIndexError
from pandas.util._decorators import (
Appender,
cache_readonly,
doc,
)
Expand Down Expand Up @@ -263,7 +264,8 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
# --------------------------------------------------------------------
# methods that dispatch to DatetimeArray and wrap result

@doc(DatetimeArray.strftime)
# error: Cannot determine type of 'strftime'
@Appender(DatetimeArray.strftime.__doc__) # type: ignore[has-type]
def strftime(self, date_format) -> Index:
arr = self._data.strftime(date_format)
return Index(arr, name=self.name)
Expand All @@ -273,12 +275,14 @@ def tz_convert(self, tz) -> DatetimeIndex:
arr = self._data.tz_convert(tz)
return type(self)._simple_new(arr, name=self.name)

@doc(DatetimeArray.tz_localize)
# error: Cannot determine type of 'tz_localize'
@doc(DatetimeArray.tz_localize) # type: ignore[has-type]
def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeIndex:
arr = self._data.tz_localize(tz, ambiguous, nonexistent)
return type(self)._simple_new(arr, name=self.name)

@doc(DatetimeArray.to_period)
# error: Cannot determine type of 'to_period'
@doc(DatetimeArray.to_period) # type: ignore[has-type]
def to_period(self, freq=None) -> PeriodIndex:
from pandas.core.indexes.api import PeriodIndex

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ def __getitem__(self, key):
return type(self)(result, name=self._name)
# Unpack to ndarray for MPL compat

result = result._ndarray
# error: Item "IntervalArray" of "Union[Any, IntervalArray,
# NDArrayBackedExtensionArray]" has no attribute "_ndarray"
result = result._ndarray # type: ignore[union-attr]

# Includes cases where we get a 2D ndarray back for MPL compat
deprecate_ndim_indexing(result)
Expand Down Expand Up @@ -438,8 +440,11 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):

_data: NDArrayBackedExtensionArray

# Argument 1 of "_simple_new" is incompatible with supertype "ExtensionIndex";
# supertype defines the argument type as
# "Union[IntervalArray, NDArrayBackedExtensionArray]"
@classmethod
def _simple_new(
def _simple_new( # type: ignore[override]
cls,
values: NDArrayBackedExtensionArray,
name: Hashable = None,
Expand Down
Loading