Skip to content

TYP: Typing for ExtensionArray.__getitem__ #41258

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6c83511
TYP: ExtensionArray.__getitem__
Dr-Irv May 1, 2021
0c8d648
make base class use PositionalIndexer2D
Dr-Irv May 2, 2021
c43eadc
merge with master
Dr-Irv May 16, 2021
94ced76
fix up getitem typing for DateTimeOps
Dr-Irv May 16, 2021
4772f8e
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv May 23, 2021
2773c8b
Make getitem on EA accept 1D, and change declaration for 2D arrays
Dr-Irv May 23, 2021
b7f2485
Merge branch 'master' into extgetitem
Dr-Irv May 31, 2021
01c0cf5
casting in datetimelike, allow NA in string arrow
Dr-Irv May 31, 2021
3b38de2
fix string arrow NA type
Dr-Irv May 31, 2021
b25d5a3
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv Jun 13, 2021
d7c545d
change an overload in mixins to use NDArrayBackedExtensionArrayT
Dr-Irv Jun 13, 2021
076e434
categorical returns Any, interval for NA, put back libmissing in stri…
Dr-Irv Jun 14, 2021
01c1a3f
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv Jul 6, 2021
738ec89
merge with master
Dr-Irv Jul 8, 2021
c5e300c
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv Jul 8, 2021
1dbb668
change ignore messages
Dr-Irv Jul 8, 2021
3e19841
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv Jul 14, 2021
73680ac
WIP: merge with master
Dr-Irv Jul 26, 2021
adf3a73
resolve conflicts in core/internals/blocks.py
Dr-Irv Jul 26, 2021
6068976
Merge remote-tracking branch 'upstream/master' into extgetitem
Dr-Irv Jul 28, 2021
0f36e5f
merge with master 0803
Dr-Irv Aug 3, 2021
9557cac
merge with master
Dr-Irv Sep 6, 2021
9a8550d
create types for split of getitem arguments
Dr-Irv Sep 6, 2021
10c454d
merge in delete/searchsorted typing changes
Dr-Irv Sep 6, 2021
a5318bc
merge with astype changes
Dr-Irv Sep 6, 2021
b941732
comments on various indexers in _typing.py
Dr-Irv Sep 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@
# TODO: add Ellipsis, see
# https://github.com/python/typing/issues/684#issuecomment-548203158
# https://bugs.python.org/issue41810
PositionalIndexer = Union[int, np.integer, slice, Sequence[int], np.ndarray]
PositionalIndexer2D = Union[
PositionalIndexer, Tuple[PositionalIndexer, PositionalIndexer]
]
# Using List[int] here rather than Sequence[int] to disallow tuples.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you followup and give some hints on where these should be used (a.g. a line of description for each; example....)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you followup and give some hints on where these should be used (a.g. a line of description for each; example....)

Done in a new commit - added comments - let me know if you want more

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, looks good.

ScalarIndexer = Union[int, np.integer]
SequenceIndexer = Union[slice, List[int], np.ndarray]
PositionalIndexer = Union[ScalarIndexer, SequenceIndexer]
PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer]
PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple]
17 changes: 16 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from typing import (
TYPE_CHECKING,
Any,
Literal,
Sequence,
TypeVar,
cast,
overload,
)

import numpy as np
Expand All @@ -16,6 +18,9 @@
from pandas._typing import (
F,
PositionalIndexer2D,
PositionalIndexerTuple,
ScalarIndexer,
SequenceIndexer,
Shape,
npt,
type_t,
Expand Down Expand Up @@ -48,7 +53,6 @@
)

if TYPE_CHECKING:
from typing import Literal

from pandas._typing import (
NumpySorter,
Expand Down Expand Up @@ -205,6 +209,17 @@ def __setitem__(self, key, value):
def _validate_setitem_value(self, value):
return value

@overload
def __getitem__(self, key: ScalarIndexer) -> Any:
...

@overload
def __getitem__(
self: NDArrayBackedExtensionArrayT,
key: SequenceIndexer | PositionalIndexerTuple,
) -> NDArrayBackedExtensionArrayT:
...

def __getitem__(
self: NDArrayBackedExtensionArrayT,
key: PositionalIndexer2D,
Expand Down
17 changes: 15 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
Dtype,
FillnaOptions,
PositionalIndexer,
ScalarIndexer,
SequenceIndexer,
Shape,
npt,
)
Expand Down Expand Up @@ -298,8 +300,17 @@ def _from_factorized(cls, values, original):
# ------------------------------------------------------------------------
# Must be a Sequence
# ------------------------------------------------------------------------
@overload
def __getitem__(self, item: ScalarIndexer) -> Any:
...

@overload
def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:
...

def __getitem__(self, item: PositionalIndexer) -> ExtensionArray | Any:
def __getitem__(
self: ExtensionArrayT, item: PositionalIndexer
) -> ExtensionArrayT | Any:
"""
Select a subset of self.

Expand All @@ -313,6 +324,8 @@ def __getitem__(self, item: PositionalIndexer) -> ExtensionArray | Any:

* ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

* list[int]: A list of int

Returns
-------
item : scalar or ExtensionArray
Expand Down Expand Up @@ -761,7 +774,7 @@ def fillna(
new_values = self.copy()
return new_values

def dropna(self):
def dropna(self: ExtensionArrayT) -> ExtensionArrayT:
"""
Return ExtensionArray without NA values.

Expand Down
18 changes: 17 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from shutil import get_terminal_size
from typing import (
TYPE_CHECKING,
Any,
Hashable,
Sequence,
TypeVar,
Expand Down Expand Up @@ -37,7 +38,11 @@
Dtype,
NpDtype,
Ordered,
PositionalIndexer2D,
PositionalIndexerTuple,
Scalar,
ScalarIndexer,
SequenceIndexer,
Shape,
npt,
type_t,
Expand Down Expand Up @@ -2017,7 +2022,18 @@ def __repr__(self) -> str:

# ------------------------------------------------------------------

def __getitem__(self, key):
@overload
def __getitem__(self, key: ScalarIndexer) -> Any:
...

@overload
def __getitem__(
self: CategoricalT,
key: SequenceIndexer | PositionalIndexerTuple,
) -> CategoricalT:
...

def __getitem__(self: CategoricalT, key: PositionalIndexer2D) -> CategoricalT | Any:
"""
Return an item.
"""
Expand Down
33 changes: 24 additions & 9 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
DtypeObj,
NpDtype,
PositionalIndexer2D,
PositionalIndexerTuple,
ScalarIndexer,
SequenceIndexer,
npt,
)
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -313,17 +316,33 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
return np.array(list(self), dtype=object)
return self._ndarray

@overload
def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
...

@overload
def __getitem__(
self, key: PositionalIndexer2D
) -> DatetimeLikeArrayMixin | DTScalarOrNaT:
self: DatetimeLikeArrayT,
item: SequenceIndexer | PositionalIndexerTuple,
) -> DatetimeLikeArrayT:
...

def __getitem__(
self: DatetimeLikeArrayT, key: PositionalIndexer2D
) -> DatetimeLikeArrayT | DTScalarOrNaT:
"""
This getitem defers to the underlying array, which by-definition can
only handle list-likes, slices, and integer scalars
"""
result = super().__getitem__(key)
# Use cast as we know we will get back a DatetimeLikeArray or DTScalar
result = cast(
Union[DatetimeLikeArrayT, DTScalarOrNaT], super().__getitem__(key)
)
if lib.is_scalar(result):
return result

else:
# At this point we know the result is an array.
result = cast(DatetimeLikeArrayT, result)
result._freq = self._get_getitem_freq(key)
return result

Expand Down Expand Up @@ -1768,11 +1787,7 @@ def factorize(self, na_sentinel=-1, sort: bool = False):
uniques = self.copy() # TODO: copy or view?
if sort and self.freq.n < 0:
codes = codes[::-1]
# TODO: overload __getitem__, a slice indexer returns same type as self
# error: Incompatible types in assignment (expression has type
# "Union[DatetimeLikeArrayMixin, Union[Any, Any]]", variable
# has type "TimelikeOps")
uniques = uniques[::-1] # type: ignore[assignment]
uniques = uniques[::-1]
return codes, uniques
# FIXME: shouldn't get here; we are ignoring sort
return super().factorize(na_sentinel=na_sentinel)
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from typing import (
TYPE_CHECKING,
Literal,
cast,
overload,
)
import warnings
Expand Down Expand Up @@ -478,11 +477,9 @@ def _generate_range(
index = cls._simple_new(arr, freq=None, dtype=dtype)

if not left_closed and len(index) and index[0] == start:
# TODO: overload DatetimeLikeArrayMixin.__getitem__
index = cast(DatetimeArray, index[1:])
index = index[1:]
if not right_closed and len(index) and index[-1] == end:
# TODO: overload DatetimeLikeArrayMixin.__getitem__
index = cast(DatetimeArray, index[:-1])
index = index[:-1]

dtype = tz_to_dtype(tz)
return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)
Expand Down
27 changes: 22 additions & 5 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from typing import (
Sequence,
TypeVar,
Union,
cast,
overload,
)

import numpy as np
Expand All @@ -31,6 +33,9 @@
ArrayLike,
Dtype,
NpDtype,
PositionalIndexer,
ScalarIndexer,
SequenceIndexer,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender
Expand Down Expand Up @@ -89,6 +94,7 @@
)

IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
IntervalOrNA = Union[Interval, float]

_interval_shared_docs: dict[str, str] = {}

Expand Down Expand Up @@ -635,7 +641,17 @@ def __iter__(self):
def __len__(self) -> int:
return len(self._left)

def __getitem__(self, key):
@overload
def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:
...

@overload
def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:
...

def __getitem__(
self: IntervalArrayT, key: PositionalIndexer
) -> IntervalArrayT | IntervalOrNA:
key = check_array_indexer(self, key)
left = self._left[key]
right = self._right[key]
Expand Down Expand Up @@ -1633,10 +1649,11 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
return self._shallow_copy(left=new_left, right=new_right)

def unique(self) -> IntervalArray:
# Invalid index type "Tuple[slice, int]" for "Union[ExtensionArray,
# ndarray[Any, Any]]"; expected type "Union[int, integer[Any], slice,
# Sequence[int], ndarray[Any, Any]]"
nc = unique(self._combined.view("complex128")[:, 0]) # type: ignore[index]
# No overload variant of "__getitem__" of "ExtensionArray" matches argument
# type "Tuple[slice, int]"
nc = unique(
self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
)
nc = nc[:, None]
return self._from_combined(nc)

Expand Down
14 changes: 13 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
NpDtype,
PositionalIndexer,
Scalar,
ScalarIndexer,
SequenceIndexer,
npt,
type_t,
)
Expand Down Expand Up @@ -139,7 +141,17 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
def dtype(self) -> BaseMaskedDtype:
raise AbstractMethodError(self)

def __getitem__(self, item: PositionalIndexer) -> BaseMaskedArray | Any:
@overload
def __getitem__(self, item: ScalarIndexer) -> Any:
...

@overload
def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT:
...

def __getitem__(
self: BaseMaskedArrayT, item: PositionalIndexer
) -> BaseMaskedArrayT | Any:
if is_integer(item):
if self._mask[item]:
return self.dtype.na_value
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
TYPE_CHECKING,
Any,
Callable,
Literal,
Sequence,
)

Expand Down Expand Up @@ -76,7 +77,6 @@
import pandas.core.common as com

if TYPE_CHECKING:
from typing import Literal

from pandas._typing import (
NumpySorter,
Expand Down
Loading