-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: ExtensionArray delete() and searchsorted() #41513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
1bf4b5d
16bcc1a
c0d426f
cce79a9
0a9cd7a
f37d215
22b147a
065454f
cde198e
b986eba
2d85ddb
493ab41
f8f8b93
704abef
f8d4267
9c391a0
4e60e04
6a888ef
2370384
42802ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,6 +91,20 @@ | |
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] | ||
Scalar = Union[PythonScalar, PandasScalar] | ||
|
||
# numpy compatible types | ||
NumpyValueArrayLike = Union[PythonScalar, Sequence[PythonScalar]] | ||
NumpySorter = Union[ | ||
int, | ||
np.integer, | ||
bool, | ||
np.ndarray, | ||
Sequence[int], | ||
Sequence[np.integer], | ||
Sequence[bool], | ||
None, | ||
] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is equivalent to also use Optional[...] instead of Union[..., None]. https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#style-guidelines (Although the docs need updating for the new syntax, not relevant for aliases) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redefined as suggested in next commit. |
||
|
||
|
||
# timestamp and timedelta convertible types | ||
|
||
TimestampConvertibleTypes = Union[ | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -30,6 +30,8 @@ | |||||
ArrayLike, | ||||||
DtypeObj, | ||||||
FrameOrSeriesUnion, | ||||||
NumpySorter, | ||||||
NumpyValueArrayLike, | ||||||
Scalar, | ||||||
) | ||||||
from pandas.util._decorators import doc | ||||||
|
@@ -1507,7 +1509,12 @@ def take( | |||||
# ------------ # | ||||||
|
||||||
|
||||||
def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: | ||||||
def searchsorted( | ||||||
arr: ArrayLike, | ||||||
value: NumpyValueArrayLike, | ||||||
side: Literal["left", "right"] = "left", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is npt._SortSide? we probably don't want to use private aliases in the codebase, but maybe OK to do so in pandas._typing. thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't use |
||||||
sorter: NumpySorter = None, | ||||||
) -> np.ndarray: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
although if we return the value from numpy unchanged, the return type is docstring also needs updating? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is |
||||||
""" | ||||||
Find indices where elements should be inserted to maintain order. | ||||||
|
||||||
|
@@ -1532,7 +1539,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: | |||||
Input array. If `sorter` is None, then it must be sorted in | ||||||
ascending order, otherwise `sorter` must be an array of indices | ||||||
that sort it. | ||||||
value : array_like | ||||||
value : array_like or single value | ||||||
Values to insert into `arr`. | ||||||
side : {'left', 'right'}, optional | ||||||
If 'left', the index of the first suitable location found is given. | ||||||
|
@@ -1573,9 +1580,10 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: | |||||
dtype = value_arr.dtype | ||||||
|
||||||
if is_scalar(value): | ||||||
value = dtype.type(value) | ||||||
# We know that value is int | ||||||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
value = cast(int, dtype.type(value)) | ||||||
else: | ||||||
value = pd_array(value, dtype=dtype) | ||||||
value = pd_array(cast(ArrayLike, value), dtype=dtype) | ||||||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
elif not ( | ||||||
is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr) | ||||||
): | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
from functools import wraps | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Any, | ||
Sequence, | ||
TypeVar, | ||
|
@@ -13,7 +14,9 @@ | |
from pandas._libs import lib | ||
from pandas._libs.arrays import NDArrayBacked | ||
from pandas._typing import ( | ||
ArrayLike, | ||
F, | ||
NumpySorter, | ||
PositionalIndexer2D, | ||
Shape, | ||
type_t, | ||
|
@@ -45,6 +48,9 @@ | |
"NDArrayBackedExtensionArrayT", bound="NDArrayBackedExtensionArray" | ||
) | ||
|
||
if TYPE_CHECKING: | ||
from typing import Literal | ||
|
||
|
||
def ravel_compat(meth: F) -> F: | ||
""" | ||
|
@@ -176,9 +182,14 @@ def _concat_same_type( | |
return to_concat[0]._from_backing_data(new_values) # type: ignore[arg-type] | ||
|
||
@doc(ExtensionArray.searchsorted) | ||
def searchsorted(self, value, side="left", sorter=None): | ||
value = self._validate_searchsorted_value(value) | ||
return self._ndarray.searchsorted(value, side=side, sorter=sorter) | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. object -> Scalar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would violate the Liskov substitution principle. Our There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess that would be bc you've typed EA.searchsorted with object, so why not change it there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ArrayLike is a subtype of object so not needed? Why does this not use the |
||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also int? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, |
||
npvalue: np.ndarray = cast(np.ndarray, self._validate_searchsorted_value(value)) | ||
Dr-Irv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return self._ndarray.searchsorted(npvalue, side=side, sorter=sorter) | ||
|
||
def _validate_searchsorted_value(self, value): | ||
return value | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
ArrayLike, | ||
Dtype, | ||
FillnaOptions, | ||
NumpySorter, | ||
PositionalIndexer, | ||
Shape, | ||
) | ||
|
@@ -812,7 +813,12 @@ def unique(self: ExtensionArrayT) -> ExtensionArrayT: | |
uniques = unique(self.astype(object)) | ||
return self._from_sequence(uniques, dtype=self.dtype) | ||
|
||
def searchsorted(self, value, side="left", sorter=None): | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also int? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, |
||
""" | ||
Find indices where elements should be inserted to maintain order. | ||
|
||
|
@@ -833,7 +839,7 @@ def searchsorted(self, value, side="left", sorter=None): | |
|
||
Parameters | ||
---------- | ||
value : array_like | ||
value : array_like or a single value | ||
Values to insert into `self`. | ||
side : {'left', 'right'}, optional | ||
If 'left', the index of the first suitable location found is given. | ||
|
@@ -1307,7 +1313,7 @@ def __hash__(self) -> int: | |
# ------------------------------------------------------------------------ | ||
# Non-Optimized Default Methods | ||
|
||
def delete(self: ExtensionArrayT, loc) -> ExtensionArrayT: | ||
def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is in the 'public' base EA class. it was added in #39405. @jbrockmendel is this method public? Is it part of the EA interface? does it need a docstring? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch ill make a note to add a docstring |
||
indexer = np.delete(np.arange(len(self)), loc) | ||
return self.take(indexer) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
Any, | ||
Callable, | ||
Sequence, | ||
cast, | ||
) | ||
|
||
import numpy as np | ||
|
@@ -39,8 +40,10 @@ | |
) | ||
from pandas._typing import ( | ||
AnyArrayLike, | ||
ArrayLike, | ||
Dtype, | ||
NpDtype, | ||
NumpySorter, | ||
) | ||
from pandas.util._decorators import ( | ||
cache_readonly, | ||
|
@@ -74,6 +77,8 @@ | |
import pandas.core.common as com | ||
|
||
if TYPE_CHECKING: | ||
from typing import Literal | ||
|
||
from pandas.core.arrays import DatetimeArray | ||
|
||
_shared_doc_kwargs = { | ||
|
@@ -642,12 +647,19 @@ def astype(self, dtype, copy: bool = True): | |
return self.asfreq(dtype.freq) | ||
return super().astype(dtype, copy=copy) | ||
|
||
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: | ||
value = self._validate_searchsorted_value(value).view("M8[ns]") | ||
def searchsorted( | ||
self, | ||
value: ArrayLike | object, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think de-facto the scalar needs to be either Period or Period-castable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
aren't we supposed to use
so if a subclass is more restrictive in what it accepts (without raising), we can't have that be reflected in the annotation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If we use
Yes and no. The full signature has to accept the same type as the parent, and can widen that argument type, but not narrow it. But, I think we can use an |
||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
npvalue = cast( | ||
np.ndarray, self._validate_searchsorted_value(value).view("M8[ns]") | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we avoid the cast with the typing of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. removed in next commit. |
||
|
||
# Cast to M8 to get datetime-like NaT placement | ||
m8arr = self._ndarray.view("M8[ns]") | ||
return m8arr.searchsorted(value, side=side, sorter=sorter) | ||
return m8arr.searchsorted(npvalue, side=side, sorter=sorter) | ||
|
||
def fillna(self, value=None, method=None, limit=None) -> PeriodArray: | ||
if method is not None: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,8 @@ | |
DtypeObj, | ||
FrameOrSeries, | ||
IndexLabel, | ||
NumpySorter, | ||
NumpyValueArrayLike, | ||
Shape, | ||
final, | ||
) | ||
|
@@ -1226,7 +1228,12 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1): | |
""" | ||
|
||
@doc(_shared_docs["searchsorted"], klass="Index") | ||
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: | ||
def searchsorted( | ||
self, | ||
value: NumpyValueArrayLike, | ||
side: Literal["left", "right"] = "left", | ||
sorter: NumpySorter = None, | ||
) -> np.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this can sometimes return an int? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like that doc/signature is inaccurate then
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this seems to be an issue with import numpy as np
a = np.array([1, 3, 5])
# reveal_type(a)
s = a.searchsorted(4)
# reveal_type(s)
print(s, type(s), isinstance(s, np.ndarray)) Result of running this is:
If I uncomment the So that gives us this question: Should the pandas I've submitted a numpy issue here: numpy/numpy#19160 |
||
return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) | ||
|
||
def drop_duplicates(self, keep="first"): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The typing definition of
Sequence
does not include numpy arrays, EAs, Series, Index etc. #28770from numpy types
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Next commit includes that change.