Skip to content

Commit 486b440

Browse files
authored
TYP: mostly Hashtable and ArrowExtensionArray (pandas-dev#56689)
* TYP: mostly Hashtable and ArrowExtensionArray * fix mypy stubtest * and return types for core.arrays * pyupgrade * runtime actually expectes np.bool_ (calls .reshape(1) on it) * TypeVar * return bool | NAType * isort
1 parent dffa51f commit 486b440

23 files changed

+343
-218
lines changed

pandas/_libs/hashtable.pyi

+22-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ from typing import (
22
Any,
33
Hashable,
44
Literal,
5+
overload,
56
)
67

78
import numpy as np
@@ -180,18 +181,30 @@ class HashTable:
180181
na_value: object = ...,
181182
mask=...,
182183
) -> npt.NDArray[np.intp]: ...
184+
@overload
183185
def unique(
184186
self,
185187
values: np.ndarray, # np.ndarray[subclass-specific]
186-
return_inverse: bool = ...,
187-
mask=...,
188-
) -> (
189-
tuple[
190-
np.ndarray, # np.ndarray[subclass-specific]
191-
npt.NDArray[np.intp],
192-
]
193-
| np.ndarray
194-
): ... # np.ndarray[subclass-specific]
188+
*,
189+
return_inverse: Literal[False] = ...,
190+
mask: None = ...,
191+
) -> np.ndarray: ... # np.ndarray[subclass-specific]
192+
@overload
193+
def unique(
194+
self,
195+
values: np.ndarray, # np.ndarray[subclass-specific]
196+
*,
197+
return_inverse: Literal[True],
198+
mask: None = ...,
199+
) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ... # np.ndarray[subclass-specific]
200+
@overload
201+
def unique(
202+
self,
203+
values: np.ndarray, # np.ndarray[subclass-specific]
204+
*,
205+
return_inverse: Literal[False] = ...,
206+
mask: npt.NDArray[np.bool_],
207+
) -> tuple[np.ndarray, npt.NDArray[np.bool_],]: ... # np.ndarray[subclass-specific]
195208
def factorize(
196209
self,
197210
values: np.ndarray, # np.ndarray[subclass-specific]

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ cdef class {{name}}HashTable(HashTable):
755755
return uniques.to_array(), result_mask.to_array()
756756
return uniques.to_array()
757757

758-
def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False, object mask=None):
758+
def unique(self, const {{dtype}}_t[:] values, *, bint return_inverse=False, object mask=None):
759759
"""
760760
Calculate unique values and labels (no sorting!)
761761

@@ -1180,7 +1180,7 @@ cdef class StringHashTable(HashTable):
11801180
return uniques.to_array(), labels.base # .base -> underlying ndarray
11811181
return uniques.to_array()
11821182

1183-
def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
1183+
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
11841184
"""
11851185
Calculate unique values and labels (no sorting!)
11861186

@@ -1438,7 +1438,7 @@ cdef class PyObjectHashTable(HashTable):
14381438
return uniques.to_array(), labels.base # .base -> underlying ndarray
14391439
return uniques.to_array()
14401440

1441-
def unique(self, ndarray[object] values, bint return_inverse=False, object mask=None):
1441+
def unique(self, ndarray[object] values, *, bint return_inverse=False, object mask=None):
14421442
"""
14431443
Calculate unique values and labels (no sorting!)
14441444

pandas/_typing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
# array-like
110110

111111
ArrayLike = Union["ExtensionArray", np.ndarray]
112+
ArrayLikeT = TypeVar("ArrayLikeT", "ExtensionArray", np.ndarray)
112113
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
113114
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
114115

@@ -137,7 +138,7 @@ def __len__(self) -> int:
137138
def __iter__(self) -> Iterator[_T_co]:
138139
...
139140

140-
def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
141+
def index(self, value: Any, start: int = ..., stop: int = ..., /) -> int:
141142
...
142143

143144
def count(self, value: Any, /) -> int:

pandas/compat/pickle_compat.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
import copy
88
import io
99
import pickle as pkl
10-
from typing import TYPE_CHECKING
10+
from typing import (
11+
TYPE_CHECKING,
12+
Any,
13+
)
1114

1215
import numpy as np
1316

@@ -209,7 +212,7 @@ def load_newobj_ex(self) -> None:
209212
pass
210213

211214

212-
def load(fh, encoding: str | None = None, is_verbose: bool = False):
215+
def load(fh, encoding: str | None = None, is_verbose: bool = False) -> Any:
213216
"""
214217
Load a pickle, with a provided encoding,
215218
@@ -239,7 +242,7 @@ def loads(
239242
fix_imports: bool = True,
240243
encoding: str = "ASCII",
241244
errors: str = "strict",
242-
):
245+
) -> Any:
243246
"""
244247
Analogous to pickle._loads.
245248
"""

pandas/core/accessor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class PandasDelegate:
5454
def _delegate_property_get(self, name: str, *args, **kwargs):
5555
raise TypeError(f"You cannot access the property {name}")
5656

57-
def _delegate_property_set(self, name: str, value, *args, **kwargs):
57+
def _delegate_property_set(self, name: str, value, *args, **kwargs) -> None:
5858
raise TypeError(f"The property {name} cannot be set")
5959

6060
def _delegate_method(self, name: str, *args, **kwargs):

pandas/core/algorithms.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pandas._typing import (
2626
AnyArrayLike,
2727
ArrayLike,
28+
ArrayLikeT,
2829
AxisInt,
2930
DtypeObj,
3031
TakeIndexer,
@@ -182,8 +183,8 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
182183

183184

184185
def _reconstruct_data(
185-
values: ArrayLike, dtype: DtypeObj, original: AnyArrayLike
186-
) -> ArrayLike:
186+
values: ArrayLikeT, dtype: DtypeObj, original: AnyArrayLike
187+
) -> ArrayLikeT:
187188
"""
188189
reverse of _ensure_data
189190
@@ -206,7 +207,9 @@ def _reconstruct_data(
206207
# that values.dtype == dtype
207208
cls = dtype.construct_array_type()
208209

209-
values = cls._from_sequence(values, dtype=dtype)
210+
# error: Incompatible types in assignment (expression has type
211+
# "ExtensionArray", variable has type "ndarray[Any, Any]")
212+
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]
210213

211214
else:
212215
values = values.astype(dtype, copy=False)
@@ -259,7 +262,9 @@ def _ensure_arraylike(values, func_name: str) -> ArrayLike:
259262
}
260263

261264

262-
def _get_hashtable_algo(values: np.ndarray):
265+
def _get_hashtable_algo(
266+
values: np.ndarray,
267+
) -> tuple[type[htable.HashTable], np.ndarray]:
263268
"""
264269
Parameters
265270
----------
@@ -1550,7 +1555,9 @@ def safe_sort(
15501555
hash_klass, values = _get_hashtable_algo(values) # type: ignore[arg-type]
15511556
t = hash_klass(len(values))
15521557
t.map_locations(values)
1553-
sorter = ensure_platform_int(t.lookup(ordered))
1558+
# error: Argument 1 to "lookup" of "HashTable" has incompatible type
1559+
# "ExtensionArray | ndarray[Any, Any] | Index | Series"; expected "ndarray"
1560+
sorter = ensure_platform_int(t.lookup(ordered)) # type: ignore[arg-type]
15541561

15551562
if use_na_sentinel:
15561563
# take_nd is faster, but only works for na_sentinels of -1

0 commit comments

Comments
 (0)