pandas-dev · jreback · Apr 8, 2021 · Apr 3, 2021 · Apr 5, 2021 · Apr 6, 2021
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -0,0 +1,200 @@
+# TODO(npdtypes): Many types specified here can be made more specific/accurate;
+#  the more specific versions are specified in comments
+
+from typing import (
+    Any,
+    Callable,
+)
+
+import numpy as np
+
+from pandas._typing import ArrayLike
+
+# placeholder until we can specify np.ndarray[object, ndim=2]
+ndarray_obj_2d = np.ndarray
+
+from enum import Enum
+
+class NoDefault(Enum):
+    ...
+
+no_default: NoDefault
+
+
+def item_from_zerodim(val: object) -> object: ...
+def infer_dtype(value: object, skipna: bool = True) -> str: ...
+
+def is_iterator(obj: object) -> bool: ...
+def is_scalar(val: object) -> bool: ...
+def is_list_like(obj: object, allow_sets: bool = True) -> bool: ...
+
+def is_period(val: object) -> bool: ...
+def is_interval(val: object) -> bool: ...
+def is_decimal(val: object) -> bool: ...
+def is_complex(val: object) -> bool: ...
+def is_bool(val: object) -> bool: ...
+def is_integer(val: object) -> bool: ...
+def is_float(val: object) -> bool: ...
+
+def is_interval_array(values: np.ndarray) -> bool: ...
+def is_period_array(values: np.ndarray) -> bool: ...
+def is_datetime64_array(values: np.ndarray) -> bool: ...
+def is_timedelta_or_timedelta64_array(values: np.ndarray) -> bool: ...
+def is_datetime_with_singletz_array(values: np.ndarray) -> bool: ...
+
+def is_time_array(values: np.ndarray, skipna: bool = False): ...
+def is_date_array(values: np.ndarray, skipna: bool = False): ...
+def is_datetime_array(values: np.ndarray, skipna: bool = False): ...
+def is_string_array(values: np.ndarray, skipna: bool = False): ...
+def is_float_array(values: np.ndarray, skipna: bool = False): ...
+def is_integer_array(values: np.ndarray, skipna: bool = False): ...
+def is_bool_array(values: np.ndarray, skipna: bool = False): ...
+
+def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> ArrayLike: ...
+
+# TODO: gen: Generator?
+def fast_unique_multiple_list_gen(gen: object, sort: bool = True) -> list: ...
+def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ...
+def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ...
+
+def map_infer(
+    arr: np.ndarray, f: Callable[[Any], Any], convert: bool = True, ignore_na: bool = False
+) -> ArrayLike: ...
+
+def maybe_convert_objects(
+    objects: np.ndarray,  # np.ndarray[object]
+    try_float: bool = False,
+    safe: bool = False,
+    convert_datetime: bool = False,
+    convert_timedelta: bool = False,
+    convert_to_nullable_integer: bool = False,
+) -> ArrayLike: ...
+
+def maybe_convert_numeric(
+    values: np.ndarray,  # np.ndarray[object]
+    na_values: set,
+    convert_empty: bool = True,
+    coerce_numeric: bool = False,
+) -> np.ndarray: ...
+
+# TODO: restrict `arr`?
+def ensure_string_array(
+    arr,
+    na_value: object = np.nan,
+    convert_na_value: bool = True,
+    copy: bool = True,
+    skipna: bool = True,
+) -> np.ndarray: ...  # np.ndarray[object]
+
+def infer_datetimelike_array(
+    arr: np.ndarray  # np.ndarray[object]
+) -> str: ...
+
+# TODO: new_dtype -> np.dtype?
+def astype_intsafe(
+    arr: np.ndarray,  # np.ndarray[object]
+    new_dtype,
+) -> np.ndarray: ...
+
+def fast_zip(ndarrays: list) -> np.ndarray: ...  # np.ndarray[object]
+
+# TODO: can we be more specific about rows?
+def to_object_array_tuples(rows: object) -> ndarray_obj_2d: ...
+
+def tuples_to_object_array(
+    tuples: np.ndarray  # np.ndarray[object]
+) -> ndarray_obj_2d: ...
+
+# TODO: can we be more specific about rows?
+def to_object_array(rows: object, min_width: int = 0) -> ndarray_obj_2d: ...
+
+def dicts_to_array(dicts: list, columns: list) -> ndarray_obj_2d: ...
+
+
+def maybe_booleans_to_slice(
+    mask: np.ndarray  # ndarray[uint8_t]
+) -> slice | np.ndarray: ...  # np.ndarray[np.uint8]
+
+def maybe_indices_to_slice(
+    indices: np.ndarray,  # np.ndarray[np.intp]
+    max_len: int,
+) -> slice | np.ndarray: ...  # np.ndarray[np.uint8]
+
+def clean_index_list(obj: list) -> tuple[
+    list | np.ndarray,  # np.ndarray[object] | np.ndarray[np.int64]
+    bool,
+]: ...
+
+
+# -----------------------------------------------------------------
+# Functions which in reality take memoryviews
+
+def memory_usage_of_objects(
+    arr: np.ndarray  # object[:]
+) -> int: ...  # np.int64
+
+
+# TODO: f: Callable?
+# TODO: dtype -> DtypeObj?
+def map_infer_mask(
+    arr: np.ndarray,
+    f: Callable[[Any], Any],
+    mask: np.ndarray,  # const uint8_t[:]
+    convert: bool = ...,
+    na_value: Any = ...,
+    dtype: Any = ...,
+) -> ArrayLike: ...
+
+def indices_fast(
+    index: np.ndarray,   # ndarray[intp_t]
+    labels: np.ndarray,  # const int64_t[:]
+    keys: list,
+    sorted_labels: list[np.ndarray],  # list[ndarray[np.int64]]
+) -> dict: ...
+
+def generate_slices(
+    labels: np.ndarray,  # const intp_t[:]
+    ngroups: int
+) -> tuple[
+    np.ndarray,  # np.ndarray[np.int64]
+    np.ndarray,  # np.ndarray[np.int64]
+]: ...
+
+def count_level_2d(
+    mask: np.ndarray,    # ndarray[uint8_t, ndim=2, cast=True],
+    labels: np.ndarray,  # const intp_t[:]
+    max_bin: int,
+    axis: int
+) -> np.ndarray: ...     # np.ndarray[np.int64, ndim=2]
+
+def get_level_sorter(
+    label: np.ndarray,   # const int64_t[:]
+    starts: np.ndarray,  # const intp_t[:]
+) -> np.ndarray: ...     #  np.ndarray[np.intp, ndim=1]
+
+
+def generate_bins_dt64(
+    values: np.ndarray,  # np.ndarray[np.int64]
+    binner: np.ndarray,  # const int64_t[:]
+    closed: object = "left",
+    hasnans: bool = False,
+) -> np.ndarray: ...     # np.ndarray[np.int64, ndim=1]
+
+
+def array_equivalent_object(
+    left: np.ndarray,   # object[:]
+    right: np.ndarray,  # object[:]
+) -> bool: ...
+
+def has_infs_f8(
+    arr: np.ndarray  # const float64_t[:]
+) -> bool: ...
+
+def has_infs_f4(
+    arr: np.ndarray  # const float32_t[:]
+) -> bool: ...
+
+def get_reverse_indexer(
+    indexer: np.ndarray,  # const intp_t[:]
+    length: int,
+) -> np.ndarray: ...      # np.ndarray[np.intp]
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -6,7 +6,10 @@
 
 import numpy as np
 
-from pandas._libs.lib import no_default
+from pandas._libs.lib import (
+    NoDefault,
+    no_default,
+)
 from pandas._libs.missing import is_matching_na
 import pandas._libs.testing as _testing
 
@@ -54,7 +57,7 @@ def assert_almost_equal(
     left,
     right,
     check_dtype: Union[bool, str] = "equiv",
-    check_less_precise: Union[bool, int] = no_default,
+    check_less_precise: Union[bool, int, NoDefault] = no_default,
     rtol: float = 1.0e-5,
     atol: float = 1.0e-8,
     **kwargs,
@@ -104,7 +107,11 @@ def assert_almost_equal(
             FutureWarning,
             stacklevel=2,
         )
-        rtol = atol = _get_tol_from_less_precise(check_less_precise)
+        # error: Argument 1 to "_get_tol_from_less_precise" has incompatible
+        # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
+        rtol = atol = _get_tol_from_less_precise(
+            check_less_precise  # type: ignore[arg-type]
+        )
 
     if isinstance(left, Index):
         assert_index_equal(
@@ -242,7 +249,7 @@ def assert_index_equal(
     right: Index,
     exact: Union[bool, str] = "equiv",
     check_names: bool = True,
-    check_less_precise: Union[bool, int] = no_default,
+    check_less_precise: Union[bool, int, NoDefault] = no_default,
     check_exact: bool = True,
     check_categorical: bool = True,
     check_order: bool = True,
@@ -331,7 +338,11 @@ def _get_ilevel_values(index, level):
             FutureWarning,
             stacklevel=2,
         )
-        rtol = atol = _get_tol_from_less_precise(check_less_precise)
+        # error: Argument 1 to "_get_tol_from_less_precise" has incompatible
+        # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
+        rtol = atol = _get_tol_from_less_precise(
+            check_less_precise  # type: ignore[arg-type]
+        )
 
     # instance validation
     _check_isinstance(left, right, Index)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -1022,7 +1022,16 @@ def apply_standard(self) -> FrameOrSeriesUnion:
                 mapped = obj._values.map(f)
             else:
                 values = obj.astype(object)._values
-                mapped = lib.map_infer(values, f, convert=self.convert_dtype)
+                # error: Argument 2 to "map_infer" has incompatible type
+                # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
+                # Dict[Hashable, Union[Union[Callable[..., Any], str],
+                # List[Union[Callable[..., Any], str]]]]]"; expected
+                # "Callable[[Any], Any]"
+                mapped = lib.map_infer(
+                    values,
+                    f,  # type: ignore[arg-type]
+                    convert=self.convert_dtype,
+                )
 
         if len(mapped) and isinstance(mapped[0], ABCSeries):
             # GH 25959 use pd.array instead of tolist

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -290,7 +290,9 @@ def _box_values(self, values) -> np.ndarray:
         """
         apply box func to passed values
         """
-        return lib.map_infer(values, self._box_func)
+        # error: Incompatible return value type (got
+        # "Union[ExtensionArray, ndarray]", expected "ndarray")
+        return lib.map_infer(values, self._box_func)  # type: ignore[return-value]
 
     def __iter__(self):
         if self.ndim > 1:

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -441,7 +441,9 @@ def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
             if not na_value_is_na:
                 mask[:] = False
 
-            return constructor(result, mask)
+            # error: Argument 1 to "maybe_convert_objects" has incompatible
+            # type "Union[ExtensionArray, ndarray]"; expected "ndarray"
+            return constructor(result, mask)  # type: ignore[arg-type]
 
         elif is_string_dtype(dtype) and not is_object_dtype(dtype):
             # i.e. StringDtype

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1194,7 +1194,8 @@ def _memory_usage(self, deep: bool = False) -> int:
 
         v = self.array.nbytes
         if deep and is_object_dtype(self) and not PYPY:
-            v += lib.memory_usage_of_objects(self._values)
+            values = cast(np.ndarray, self._values)
+            v += lib.memory_usage_of_objects(values)
         return v
 
     @doc(

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -121,18 +121,19 @@ def maybe_convert_platform(
     values: list | tuple | range | np.ndarray | ExtensionArray,
 ) -> ArrayLike:
     """ try to do platform conversion, allow ndarray or list here """
+    arr: ArrayLike
+
     if isinstance(values, (list, tuple, range)):
         arr = construct_1d_object_array_from_listlike(values)
     else:
         # The caller is responsible for ensuring that we have np.ndarray
         #  or ExtensionArray here.
-
-        # error: Incompatible types in assignment (expression has type "Union[ndarray,
-        # ExtensionArray]", variable has type "ndarray")
-        arr = values  # type: ignore[assignment]
+        arr = values
 
     if arr.dtype == object:
-        arr = lib.maybe_convert_objects(arr)
+        # error: Argument 1 to "maybe_convert_objects" has incompatible type
+        # "Union[ExtensionArray, ndarray]"; expected "ndarray"
+        arr = lib.maybe_convert_objects(arr)  # type: ignore[arg-type]
 
     return arr
 
@@ -1436,9 +1437,13 @@ def convert_dtypes(
 
     Returns
     -------
+    str, np.dtype, or ExtensionDtype
     dtype
         new dtype
     """
+    inferred_dtype: str | np.dtype | ExtensionDtype
+    # TODO: rule out str
+
     if (
         convert_string or convert_integer or convert_boolean or convert_floating
     ) and isinstance(input_array, np.ndarray):

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
@@ -440,6 +440,9 @@ def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
     This does not include the special treatment is_bool_dtype uses for
     Categorical.
     """
+    if not isinstance(arr, np.ndarray):
+        return False
+
     dtype = arr.dtype
     if dtype == np.dtype(bool):
         return True

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2021,7 +2021,13 @@ def from_records(
             if coerce_float:
                 for i, arr in enumerate(arrays):
                     if arr.dtype == object:
-                        arrays[i] = lib.maybe_convert_objects(arr, try_float=True)
+                        # error: Argument 1 to "maybe_convert_objects" has
+                        # incompatible type "Union[ExtensionArray, ndarray]";
+                        # expected "ndarray"
+                        arrays[i] = lib.maybe_convert_objects(
+                            arr,  # type: ignore[arg-type]
+                            try_float=True,
+                        )
 
             arr_columns = ensure_index(arr_columns)
             if columns is None:
@@ -7388,7 +7394,7 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        squeeze: bool = no_default,
+        squeeze: bool | lib.NoDefault = no_default,
         observed: bool = False,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
@@ -7410,6 +7416,8 @@ def groupby(
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
+        # error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type
+        # "Union[bool, NoDefault]"; expected "bool"
         return DataFrameGroupBy(
             obj=self,
             keys=by,
@@ -7418,7 +7426,7 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,
+            squeeze=squeeze,  # type: ignore[arg-type]
             observed=observed,
             dropna=dropna,
         )