Skip to content

TYP: lib.pyi #40772

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# TODO(npdtypes): Many types specified here can be made more specific/accurate;
# the more specific versions are specified in comments

from typing import (
Any,
Callable,
)

import numpy as np

from pandas._typing import ArrayLike

# placeholder until we can specify np.ndarray[object, ndim=2]
ndarray_obj_2d = np.ndarray

from enum import Enum

class NoDefault(Enum):
...

no_default: NoDefault


def item_from_zerodim(val: object) -> object: ...
def infer_dtype(value: object, skipna: bool = True) -> str: ...

def is_iterator(obj: object) -> bool: ...
def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = True) -> bool: ...

def is_period(val: object) -> bool: ...
def is_interval(val: object) -> bool: ...
def is_decimal(val: object) -> bool: ...
def is_complex(val: object) -> bool: ...
def is_bool(val: object) -> bool: ...
def is_integer(val: object) -> bool: ...
def is_float(val: object) -> bool: ...

def is_interval_array(values: np.ndarray) -> bool: ...
def is_period_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...

def is_time_array(values: np.ndarray, skipna: bool = False): ...
def is_date_array(values: np.ndarray, skipna: bool = False): ...
def is_datetime_array(values: np.ndarray, skipna: bool = False): ...
def is_string_array(values: np.ndarray, skipna: bool = False): ...
def is_float_array(values: np.ndarray, skipna: bool = False): ...
def is_integer_array(values: np.ndarray, skipna: bool = False): ...
def is_bool_array(values: np.ndarray, skipna: bool = False): ...

def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...

# TODO: gen: Generator?
def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ...
def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...

def map_infer(
arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
) -> ArrayLike: ...

def maybe_convert_objects(
objects: np.ndarray, # np.ndarray[object]
try_float: bool = False,
safe: bool = False,
convert_datetime: bool = False,
convert_timedelta: bool = False,
convert_to_nullable_integer: bool = False,
) -> ArrayLike: ...

def maybe_convert_numeric(
values: np.ndarray, # np.ndarray[object]
na_values: set,
convert_empty: bool = True,
coerce_numeric: bool = False,
) -> np.ndarray: ...

# TODO: restrict `arr`?
def ensure_string_array(
arr,
na_value: object = np.nan,
convert_na_value: bool = True,
copy: bool = True,
skipna: bool = True,
) -> np.ndarray: ... # np.ndarray[object]

def infer_datetimelike_array(
arr: np.ndarray # np.ndarray[object]
) -> str: ...

# TODO: new_dtype -> np.dtype?
def astype_intsafe(
arr: np.ndarray, # np.ndarray[object]
new_dtype,
) -> np.ndarray: ...

def fast_zip(ndarrays: list) -> np.ndarray: ... # np.ndarray[object]

# TODO: can we be more specific about rows?
def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...

def tuples_to_object_array(
tuples: np.ndarray # np.ndarray[object]
) -> ndarray_obj_2d: ...

# TODO: can we be more specific about rows?
def to_object_array(rows: object, min_width: int = 0) -> ndarray_obj_2d: ...

def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...


def maybe_booleans_to_slice(
mask: np.ndarray # ndarray[uint8_t]
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]

def maybe_indices_to_slice(
indices: np.ndarray, # np.ndarray[np.intp]
max_len: int,
) -> slice | np.ndarray: ... # np.ndarray[np.uint8]

def clean_index_list(obj: list) -> tuple[
list | np.ndarray, # np.ndarray[object] | np.ndarray[np.int64]
bool,
]: ...


# -----------------------------------------------------------------
# Functions which in reality take memoryviews

def memory_usage_of_objects(
arr: np.ndarray # object[:]
) -> int: ... # np.int64


# TODO: f: Callable?
# TODO: dtype -> DtypeObj?
def map_infer_mask(
arr: np.ndarray,
f: Callable[[Any], Any],
mask: np.ndarray, # const uint8_t[:]
convert: bool = ...,
na_value: Any = ...,
dtype: Any = ...,
) -> ArrayLike: ...

def indices_fast(
index: np.ndarray, # ndarray[intp_t]
labels: np.ndarray, # const int64_t[:]
keys: list,
sorted_labels: list[np.ndarray], # list[ndarray[np.int64]]
) -> dict: ...

def generate_slices(
labels: np.ndarray, # const intp_t[:]
ngroups: int
) -> tuple[
np.ndarray, # np.ndarray[np.int64]
np.ndarray, # np.ndarray[np.int64]
]: ...

def count_level_2d(
mask: np.ndarray, # ndarray[uint8_t, ndim=2, cast=True],
labels: np.ndarray, # const intp_t[:]
max_bin: int,
axis: int
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=2]

def get_level_sorter(
label: np.ndarray, # const int64_t[:]
starts: np.ndarray, # const intp_t[:]
) -> np.ndarray: ... # np.ndarray[np.intp, ndim=1]


def generate_bins_dt64(
values: np.ndarray, # np.ndarray[np.int64]
binner: np.ndarray, # const int64_t[:]
closed: object = "left",
hasnans: bool = False,
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]


def array_equivalent_object(
left: np.ndarray, # object[:]
right: np.ndarray, # object[:]
) -> bool: ...

def has_infs_f8(
arr: np.ndarray # const float64_t[:]
) -> bool: ...

def has_infs_f4(
arr: np.ndarray # const float32_t[:]
) -> bool: ...

def get_reverse_indexer(
indexer: np.ndarray, # const intp_t[:]
length: int,
) -> np.ndarray: ... # np.ndarray[np.intp]
21 changes: 16 additions & 5 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

import numpy as np

from pandas._libs.lib import no_default
from pandas._libs.lib import (
NoDefault,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably import from pandas._typing.

alternatively we could have no_default as 'Any' in pandas/_libs/lib.pyi and not need to have any reference to it in the function signatures which are visible to end users and could be confusing/misleading. we should never pass lib.no_default explicitly unless passing it along to a lower level function

no_default,
)
from pandas._libs.missing import is_matching_na
import pandas._libs.testing as _testing

Expand Down Expand Up @@ -54,7 +57,7 @@ def assert_almost_equal(
left,
right,
check_dtype: Union[bool, str] = "equiv",
check_less_precise: Union[bool, int] = no_default,
check_less_precise: Union[bool, int, NoDefault] = no_default,
rtol: float = 1.0e-5,
atol: float = 1.0e-8,
**kwargs,
Expand Down Expand Up @@ -104,7 +107,11 @@ def assert_almost_equal(
FutureWarning,
stacklevel=2,
)
rtol = atol = _get_tol_from_less_precise(check_less_precise)
# error: Argument 1 to "_get_tol_from_less_precise" has incompatible
# type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
rtol = atol = _get_tol_from_less_precise(
check_less_precise # type: ignore[arg-type]
)

if isinstance(left, Index):
assert_index_equal(
Expand Down Expand Up @@ -242,7 +249,7 @@ def assert_index_equal(
right: Index,
exact: Union[bool, str] = "equiv",
check_names: bool = True,
check_less_precise: Union[bool, int] = no_default,
check_less_precise: Union[bool, int, NoDefault] = no_default,
check_exact: bool = True,
check_categorical: bool = True,
check_order: bool = True,
Expand Down Expand Up @@ -331,7 +338,11 @@ def _get_ilevel_values(index, level):
FutureWarning,
stacklevel=2,
)
rtol = atol = _get_tol_from_less_precise(check_less_precise)
# error: Argument 1 to "_get_tol_from_less_precise" has incompatible
# type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
rtol = atol = _get_tol_from_less_precise(
check_less_precise # type: ignore[arg-type]
)

# instance validation
_check_isinstance(left, right, Index)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,16 @@ def apply_standard(self) -> FrameOrSeriesUnion:
mapped = obj._values.map(f)
else:
values = obj.astype(object)._values
mapped = lib.map_infer(values, f, convert=self.convert_dtype)
# error: Argument 2 to "map_infer" has incompatible type
# "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
# Dict[Hashable, Union[Union[Callable[..., Any], str],
# List[Union[Callable[..., Any], str]]]]]"; expected
# "Callable[[Any], Any]"
mapped = lib.map_infer(
values,
f, # type: ignore[arg-type]
convert=self.convert_dtype,
)

if len(mapped) and isinstance(mapped[0], ABCSeries):
# GH 25959 use pd.array instead of tolist
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,9 @@ def _box_values(self, values) -> np.ndarray:
"""
apply box func to passed values
"""
return lib.map_infer(values, self._box_func)
# error: Incompatible return value type (got
# "Union[ExtensionArray, ndarray]", expected "ndarray")
return lib.map_infer(values, self._box_func) # type: ignore[return-value]

def __iter__(self):
if self.ndim > 1:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,9 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
if not na_value_is_na:
mask[:] = False

return constructor(result, mask)
# error: Argument 1 to "maybe_convert_objects" has incompatible
# type "Union[ExtensionArray, ndarray]"; expected "ndarray"
return constructor(result, mask) # type: ignore[arg-type]

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,8 @@ def _memory_usage(self, deep: bool = False) -> int:

v = self.array.nbytes
if deep and is_object_dtype(self) and not PYPY:
v += lib.memory_usage_of_objects(self._values)
values = cast(np.ndarray, self._values)
v += lib.memory_usage_of_objects(values)
return v

@doc(
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,19 @@ def maybe_convert_platform(
values: list | tuple | range | np.ndarray | ExtensionArray,
) -> ArrayLike:
""" try to do platform conversion, allow ndarray or list here """
arr: ArrayLike

if isinstance(values, (list, tuple, range)):
arr = construct_1d_object_array_from_listlike(values)
else:
# The caller is responsible for ensuring that we have np.ndarray
# or ExtensionArray here.

# error: Incompatible types in assignment (expression has type "Union[ndarray,
# ExtensionArray]", variable has type "ndarray")
arr = values # type: ignore[assignment]
arr = values

if arr.dtype == object:
arr = lib.maybe_convert_objects(arr)
# error: Argument 1 to "maybe_convert_objects" has incompatible type
# "Union[ExtensionArray, ndarray]"; expected "ndarray"
arr = lib.maybe_convert_objects(arr) # type: ignore[arg-type]

return arr

Expand Down Expand Up @@ -1436,9 +1437,13 @@ def convert_dtypes(

Returns
-------
str, np.dtype, or ExtensionDtype
dtype
new dtype
"""
inferred_dtype: str | np.dtype | ExtensionDtype
# TODO: rule out str

if (
convert_string or convert_integer or convert_boolean or convert_floating
) and isinstance(input_array, np.ndarray):
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,9 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
This does not include the special treatment is_bool_dtype uses for
Categorical.
"""
if not isinstance(arr, np.ndarray):
return False

dtype = arr.dtype
if dtype == np.dtype(bool):
return True
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2021,7 +2021,13 @@ def from_records(
if coerce_float:
for i, arr in enumerate(arrays):
if arr.dtype == object:
arrays[i] = lib.maybe_convert_objects(arr, try_float=True)
# error: Argument 1 to "maybe_convert_objects" has
# incompatible type "Union[ExtensionArray, ndarray]";
# expected "ndarray"
arrays[i] = lib.maybe_convert_objects(
arr, # type: ignore[arg-type]
try_float=True,
)

arr_columns = ensure_index(arr_columns)
if columns is None:
Expand Down Expand Up @@ -7388,7 +7394,7 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = no_default,
squeeze: bool | lib.NoDefault = no_default,
observed: bool = False,
dropna: bool = True,
) -> DataFrameGroupBy:
Expand All @@ -7410,6 +7416,8 @@ def groupby(
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)

# error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type
# "Union[bool, NoDefault]"; expected "bool"
return DataFrameGroupBy(
obj=self,
keys=by,
Expand All @@ -7418,7 +7426,7 @@ def groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
squeeze=squeeze, # type: ignore[arg-type]
observed=observed,
dropna=dropna,
)
Expand Down
Loading