Skip to content

Commit 8a5afc2

Browse files
authored
TYP: lib.pyi (#40772)
1 parent 804e08e commit 8a5afc2

File tree

16 files changed

+281
-26
lines changed

16 files changed

+281
-26
lines changed

pandas/_libs/lib.pyi

+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
# TODO(npdtypes): Many types specified here can be made more specific/accurate;
2+
# the more specific versions are specified in comments
3+
4+
from typing import (
5+
Any,
6+
Callable,
7+
)
8+
9+
import numpy as np
10+
11+
from pandas._typing import ArrayLike
12+
13+
# placeholder until we can specify np.ndarray[object, ndim=2]
14+
ndarray_obj_2d = np.ndarray
15+
16+
from enum import Enum
17+
18+
class NoDefault(Enum):
19+
...
20+
21+
no_default: NoDefault
22+
23+
24+
def item_from_zerodim(val: object) -> object: ...
25+
def infer_dtype(value: object, skipna: bool = True) -> str: ...
26+
27+
def is_iterator(obj: object) -> bool: ...
28+
def is_scalar(val: object) -> bool: ...
29+
def is_list_like(obj: object, allow_sets: bool = True) -> bool: ...
30+
31+
def is_period(val: object) -> bool: ...
32+
def is_interval(val: object) -> bool: ...
33+
def is_decimal(val: object) -> bool: ...
34+
def is_complex(val: object) -> bool: ...
35+
def is_bool(val: object) -> bool: ...
36+
def is_integer(val: object) -> bool: ...
37+
def is_float(val: object) -> bool: ...
38+
39+
def is_interval_array(values: np.ndarray) -> bool: ...
40+
def is_period_array(values: np.ndarray) -> bool: ...
41+
def is_datetime64_array(values: np.ndarray) -> bool: ...
42+
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
43+
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
44+
45+
def is_time_array(values: np.ndarray, skipna: bool = False): ...
46+
def is_date_array(values: np.ndarray, skipna: bool = False): ...
47+
def is_datetime_array(values: np.ndarray, skipna: bool = False): ...
48+
def is_string_array(values: np.ndarray, skipna: bool = False): ...
49+
def is_float_array(values: np.ndarray, skipna: bool = False): ...
50+
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
51+
def is_bool_array(values: np.ndarray, skipna: bool = False): ...
52+
53+
def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
54+
55+
# TODO: gen: Generator?
56+
def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ...
57+
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
58+
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
59+
60+
def map_infer(
61+
arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
62+
) -> ArrayLike: ...
63+
64+
def maybe_convert_objects(
65+
objects: np.ndarray, # np.ndarray[object]
66+
try_float: bool = False,
67+
safe: bool = False,
68+
convert_datetime: bool = False,
69+
convert_timedelta: bool = False,
70+
convert_to_nullable_integer: bool = False,
71+
) -> ArrayLike: ...
72+
73+
def maybe_convert_numeric(
74+
values: np.ndarray, # np.ndarray[object]
75+
na_values: set,
76+
convert_empty: bool = True,
77+
coerce_numeric: bool = False,
78+
) -> np.ndarray: ...
79+
80+
# TODO: restrict `arr`?
81+
def ensure_string_array(
82+
arr,
83+
na_value: object = np.nan,
84+
convert_na_value: bool = True,
85+
copy: bool = True,
86+
skipna: bool = True,
87+
) -> np.ndarray: ... # np.ndarray[object]
88+
89+
def infer_datetimelike_array(
90+
arr: np.ndarray # np.ndarray[object]
91+
) -> str: ...
92+
93+
# TODO: new_dtype -> np.dtype?
94+
def astype_intsafe(
95+
arr: np.ndarray, # np.ndarray[object]
96+
new_dtype,
97+
) -> np.ndarray: ...
98+
99+
def fast_zip(ndarrays: list) -> np.ndarray: ... # np.ndarray[object]
100+
101+
# TODO: can we be more specific about rows?
102+
def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...
103+
104+
def tuples_to_object_array(
105+
tuples: np.ndarray # np.ndarray[object]
106+
) -> ndarray_obj_2d: ...
107+
108+
# TODO: can we be more specific about rows?
109+
def to_object_array(rows: object, min_width: int = 0) -> ndarray_obj_2d: ...
110+
111+
def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...
112+
113+
114+
def maybe_booleans_to_slice(
115+
mask: np.ndarray # ndarray[uint8_t]
116+
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
117+
118+
def maybe_indices_to_slice(
119+
indices: np.ndarray, # np.ndarray[np.intp]
120+
max_len: int,
121+
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]
122+
123+
def clean_index_list(obj: list) -> tuple[
124+
list | np.ndarray, # np.ndarray[object] | np.ndarray[np.int64]
125+
bool,
126+
]: ...
127+
128+
129+
# -----------------------------------------------------------------
130+
# Functions which in reality take memoryviews
131+
132+
def memory_usage_of_objects(
133+
arr: np.ndarray # object[:]
134+
) -> int: ... # np.int64
135+
136+
137+
# TODO: f: Callable?
138+
# TODO: dtype -> DtypeObj?
139+
def map_infer_mask(
140+
arr: np.ndarray,
141+
f: Callable[[Any], Any],
142+
mask: np.ndarray, # const uint8_t[:]
143+
convert: bool = ...,
144+
na_value: Any = ...,
145+
dtype: Any = ...,
146+
) -> ArrayLike: ...
147+
148+
def indices_fast(
149+
index: np.ndarray, # ndarray[intp_t]
150+
labels: np.ndarray, # const int64_t[:]
151+
keys: list,
152+
sorted_labels: list[np.ndarray], # list[ndarray[np.int64]]
153+
) -> dict: ...
154+
155+
def generate_slices(
156+
labels: np.ndarray, # const intp_t[:]
157+
ngroups: int
158+
) -> tuple[
159+
np.ndarray, # np.ndarray[np.int64]
160+
np.ndarray, # np.ndarray[np.int64]
161+
]: ...
162+
163+
def count_level_2d(
164+
mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True],
165+
labels: np.ndarray, # const intp_t[:]
166+
max_bin: int,
167+
axis: int
168+
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]
169+
170+
def get_level_sorter(
171+
label: np.ndarray, # const int64_t[:]
172+
starts: np.ndarray, # const intp_t[:]
173+
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]
174+
175+
176+
def generate_bins_dt64(
177+
values: np.ndarray, # np.ndarray[np.int64]
178+
binner: np.ndarray, # const int64_t[:]
179+
closed: object = "left",
180+
hasnans: bool = False,
181+
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
182+
183+
184+
def array_equivalent_object(
185+
left: np.ndarray, # object[:]
186+
right: np.ndarray, # object[:]
187+
) -> bool: ...
188+
189+
def has_infs_f8(
190+
arr: np.ndarray # const float64_t[:]
191+
) -> bool: ...
192+
193+
def has_infs_f4(
194+
arr: np.ndarray # const float32_t[:]
195+
) -> bool: ...
196+
197+
def get_reverse_indexer(
198+
indexer: np.ndarray, # const intp_t[:]
199+
length: int,
200+
) -> np.ndarray: ... # np.ndarray[np.intp]

pandas/_testing/asserters.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
import numpy as np
88

9-
from pandas._libs.lib import no_default
9+
from pandas._libs.lib import (
10+
NoDefault,
11+
no_default,
12+
)
1013
from pandas._libs.missing import is_matching_na
1114
import pandas._libs.testing as _testing
1215

@@ -54,7 +57,7 @@ def assert_almost_equal(
5457
left,
5558
right,
5659
check_dtype: Union[bool, str] = "equiv",
57-
check_less_precise: Union[bool, int] = no_default,
60+
check_less_precise: Union[bool, int, NoDefault] = no_default,
5861
rtol: float = 1.0e-5,
5962
atol: float = 1.0e-8,
6063
**kwargs,
@@ -104,7 +107,11 @@ def assert_almost_equal(
104107
FutureWarning,
105108
stacklevel=2,
106109
)
107-
rtol = atol = _get_tol_from_less_precise(check_less_precise)
110+
# error: Argument 1 to "_get_tol_from_less_precise" has incompatible
111+
# type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
112+
rtol = atol = _get_tol_from_less_precise(
113+
check_less_precise # type: ignore[arg-type]
114+
)
108115

109116
if isinstance(left, Index):
110117
assert_index_equal(
@@ -242,7 +249,7 @@ def assert_index_equal(
242249
right: Index,
243250
exact: Union[bool, str] = "equiv",
244251
check_names: bool = True,
245-
check_less_precise: Union[bool, int] = no_default,
252+
check_less_precise: Union[bool, int, NoDefault] = no_default,
246253
check_exact: bool = True,
247254
check_categorical: bool = True,
248255
check_order: bool = True,
@@ -331,7 +338,11 @@ def _get_ilevel_values(index, level):
331338
FutureWarning,
332339
stacklevel=2,
333340
)
334-
rtol = atol = _get_tol_from_less_precise(check_less_precise)
341+
# error: Argument 1 to "_get_tol_from_less_precise" has incompatible
342+
# type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
343+
rtol = atol = _get_tol_from_less_precise(
344+
check_less_precise # type: ignore[arg-type]
345+
)
335346

336347
# instance validation
337348
_check_isinstance(left, right, Index)

pandas/core/apply.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,16 @@ def apply_standard(self) -> FrameOrSeriesUnion:
10221022
mapped = obj._values.map(f)
10231023
else:
10241024
values = obj.astype(object)._values
1025-
mapped = lib.map_infer(values, f, convert=self.convert_dtype)
1025+
# error: Argument 2 to "map_infer" has incompatible type
1026+
# "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
1027+
# Dict[Hashable, Union[Union[Callable[..., Any], str],
1028+
# List[Union[Callable[..., Any], str]]]]]"; expected
1029+
# "Callable[[Any], Any]"
1030+
mapped = lib.map_infer(
1031+
values,
1032+
f, # type: ignore[arg-type]
1033+
convert=self.convert_dtype,
1034+
)
10261035

10271036
if len(mapped) and isinstance(mapped[0], ABCSeries):
10281037
# GH 25959 use pd.array instead of tolist

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,9 @@ def _box_values(self, values) -> np.ndarray:
291291
"""
292292
apply box func to passed values
293293
"""
294-
return lib.map_infer(values, self._box_func)
294+
# error: Incompatible return value type (got
295+
# "Union[ExtensionArray, ndarray]", expected "ndarray")
296+
return lib.map_infer(values, self._box_func) # type: ignore[return-value]
295297

296298
def __iter__(self):
297299
if self.ndim > 1:

pandas/core/arrays/string_.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,9 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
441441
if not na_value_is_na:
442442
mask[:] = False
443443

444-
return constructor(result, mask)
444+
# error: Argument 1 to "maybe_convert_objects" has incompatible
445+
# type "Union[ExtensionArray, ndarray]"; expected "ndarray"
446+
return constructor(result, mask) # type: ignore[arg-type]
445447

446448
elif is_string_dtype(dtype) and not is_object_dtype(dtype):
447449
# i.e. StringDtype

pandas/core/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,8 @@ def _memory_usage(self, deep: bool = False) -> int:
11941194

11951195
v = self.array.nbytes
11961196
if deep and is_object_dtype(self) and not PYPY:
1197-
v += lib.memory_usage_of_objects(self._values)
1197+
values = cast(np.ndarray, self._values)
1198+
v += lib.memory_usage_of_objects(values)
11981199
return v
11991200

12001201
@doc(

pandas/core/dtypes/cast.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -121,18 +121,19 @@ def maybe_convert_platform(
121121
values: list | tuple | range | np.ndarray | ExtensionArray,
122122
) -> ArrayLike:
123123
""" try to do platform conversion, allow ndarray or list here """
124+
arr: ArrayLike
125+
124126
if isinstance(values, (list, tuple, range)):
125127
arr = construct_1d_object_array_from_listlike(values)
126128
else:
127129
# The caller is responsible for ensuring that we have np.ndarray
128130
# or ExtensionArray here.
129-
130-
# error: Incompatible types in assignment (expression has type "Union[ndarray,
131-
# ExtensionArray]", variable has type "ndarray")
132-
arr = values # type: ignore[assignment]
131+
arr = values
133132

134133
if arr.dtype == object:
135-
arr = lib.maybe_convert_objects(arr)
134+
# error: Argument 1 to "maybe_convert_objects" has incompatible type
135+
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
136+
arr = lib.maybe_convert_objects(arr) # type: ignore[arg-type]
136137

137138
return arr
138139

@@ -1434,9 +1435,13 @@ def convert_dtypes(
14341435
14351436
Returns
14361437
-------
1438+
str, np.dtype, or ExtensionDtype
14371439
dtype
14381440
new dtype
14391441
"""
1442+
inferred_dtype: str | np.dtype | ExtensionDtype
1443+
# TODO: rule out str
1444+
14401445
if (
14411446
convert_string or convert_integer or convert_boolean or convert_floating
14421447
) and isinstance(input_array, np.ndarray):

pandas/core/dtypes/inference.py

+3
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,9 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
440440
This does not include the special treatment is_bool_dtype uses for
441441
Categorical.
442442
"""
443+
if not isinstance(arr, np.ndarray):
444+
return False
445+
443446
dtype = arr.dtype
444447
if dtype == np.dtype(bool):
445448
return True

pandas/core/frame.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -2021,7 +2021,13 @@ def from_records(
20212021
if coerce_float:
20222022
for i, arr in enumerate(arrays):
20232023
if arr.dtype == object:
2024-
arrays[i] = lib.maybe_convert_objects(arr, try_float=True)
2024+
# error: Argument 1 to "maybe_convert_objects" has
2025+
# incompatible type "Union[ExtensionArray, ndarray]";
2026+
# expected "ndarray"
2027+
arrays[i] = lib.maybe_convert_objects(
2028+
arr, # type: ignore[arg-type]
2029+
try_float=True,
2030+
)
20252031

20262032
arr_columns = ensure_index(arr_columns)
20272033
if columns is None:
@@ -7425,7 +7431,7 @@ def groupby(
74257431
as_index: bool = True,
74267432
sort: bool = True,
74277433
group_keys: bool = True,
7428-
squeeze: bool = no_default,
7434+
squeeze: bool | lib.NoDefault = no_default,
74297435
observed: bool = False,
74307436
dropna: bool = True,
74317437
) -> DataFrameGroupBy:
@@ -7447,6 +7453,8 @@ def groupby(
74477453
raise TypeError("You have to supply one of 'by' and 'level'")
74487454
axis = self._get_axis_number(axis)
74497455

7456+
# error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type
7457+
# "Union[bool, NoDefault]"; expected "bool"
74507458
return DataFrameGroupBy(
74517459
obj=self,
74527460
keys=by,
@@ -7455,7 +7463,7 @@ def groupby(
74557463
as_index=as_index,
74567464
sort=sort,
74577465
group_keys=group_keys,
7458-
squeeze=squeeze,
7466+
squeeze=squeeze, # type: ignore[arg-type]
74597467
observed=observed,
74607468
dropna=dropna,
74617469
)

0 commit comments

Comments
 (0)