pandas-dev · jreback · Sep 30, 2021 · Sep 30, 2021 · Sep 30, 2021 · jreback
diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
@@ -1,5 +1,5 @@
 """
-Benchmarks in this fiel depend exclusively on code in _libs/
+Benchmarks in this file depend exclusively on code in _libs/
 
 If a PR does not edit anything in _libs, it is very unlikely that benchmarks
 in this file will be affected.

diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
@@ -6,9 +6,9 @@ from pandas._typing import npt
 
 def group_median_float64(
     out: np.ndarray,  # ndarray[float64_t, ndim=2]
-    counts: np.ndarray,  # ndarray[int64_t]
+    counts: npt.NDArray[np.int64],
     values: np.ndarray,  # ndarray[float64_t, ndim=2]
-    labels: np.ndarray,  # ndarray[int64_t]
+    labels: npt.NDArray[np.int64],
     min_count: int = ...,  # Py_ssize_t
 ) -> None: ...
 def group_cumprod_float64(
@@ -37,7 +37,7 @@ def group_fillna_indexer(
     out: np.ndarray,  # ndarray[intp_t]
     labels: np.ndarray,  # ndarray[int64_t]
     sorted_labels: npt.NDArray[np.intp],
-    mask: np.ndarray,  # ndarray[uint8_t]
+    mask: npt.NDArray[np.uint8],
     direction: Literal["ffill", "bfill"],
     limit: int,  # int64_t
     dropna: bool,

diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
@@ -5,6 +5,8 @@ from typing import (
 
 import numpy as np
 
+from pandas._typing import npt
+
 def unique_label_indices(
     labels: np.ndarray,  # const int64_t[:]
 ) -> np.ndarray: ...
@@ -19,11 +21,11 @@ class ObjectFactorizer(Factorizer):
     uniques: ObjectVector
     def factorize(
         self,
-        values: np.ndarray,  # ndarray[object]
+        values: npt.NDArray[np.object_],
         sort: bool = ...,
         na_sentinel=...,
         na_value=...,
-    ) -> np.ndarray: ...  # np.ndarray[intp]
+    ) -> npt.NDArray[np.intp]: ...
 
 class Int64Factorizer(Factorizer):
     table: Int64HashTable
@@ -34,77 +36,77 @@ class Int64Factorizer(Factorizer):
         sort: bool = ...,
         na_sentinel=...,
         na_value=...,
-    ) -> np.ndarray: ...  # np.ndarray[intp]
+    ) -> npt.NDArray[np.intp]: ...
 
 class Int64Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.int64]
+    def to_array(self) -> npt.NDArray[np.int64]: ...
 
 class Int32Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.int32]
+    def to_array(self) -> npt.NDArray[np.int32]: ...
 
 class Int16Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.int16]
+    def to_array(self) -> npt.NDArray[np.int16]: ...
 
 class Int8Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.int8]
+    def to_array(self) -> npt.NDArray[np.int8]: ...
 
 class UInt64Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.uint64]
+    def to_array(self) -> npt.NDArray[np.uint64]: ...
 
 class UInt32Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.uint32]
+    def to_array(self) -> npt.NDArray[np.uint32]: ...
 
 class UInt16Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.uint16]
+    def to_array(self) -> npt.NDArray[np.uint16]: ...
 
 class UInt8Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.uint8]
+    def to_array(self) -> npt.NDArray[np.uint8]: ...
 
 class Float64Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.float64]
+    def to_array(self) -> npt.NDArray[np.float64]: ...
 
 class Float32Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.float32]
+    def to_array(self) -> npt.NDArray[np.float32]: ...
 
 class Complex128Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.complex128]
+    def to_array(self) -> npt.NDArray[np.complex128]: ...
 
 class Complex64Vector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[np.complex64]
+    def to_array(self) -> npt.NDArray[np.complex64]: ...
 
 class StringVector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[object]
+    def to_array(self) -> npt.NDArray[np.object_]: ...
 
 class ObjectVector:
     def __init__(self): ...
     def __len__(self) -> int: ...
-    def to_array(self) -> np.ndarray: ...  # np.ndarray[object]
+    def to_array(self) -> npt.NDArray[np.object_]: ...
 
 class HashTable:
     # NB: The base HashTable class does _not_ actually have these methods;
@@ -131,22 +133,22 @@ class HashTable:
     def lookup(
         self,
         values: np.ndarray,  # np.ndarray[subclass-specific]
-    ) -> np.ndarray: ...  # np.ndarray[np.intp]
+    ) -> npt.NDArray[np.intp]: ...
     def get_labels(
         self,
         values: np.ndarray,  # np.ndarray[subclass-specific]
         uniques,  # SubclassTypeVector
         count_prior: int = ...,
         na_sentinel: int = ...,
         na_value: object = ...,
-    ) -> np.ndarray: ...  # np.ndarray[intp_t]
+    ) -> npt.NDArray[np.intp]: ...
     def unique(
         self,
         values: np.ndarray,  # np.ndarray[subclass-specific]
         return_inverse: bool = ...,
     ) -> tuple[
         np.ndarray,  # np.ndarray[subclass-specific]
-        np.ndarray,  # np.ndarray[np.intp],
+        npt.NDArray[np.intp],
     ] | np.ndarray: ...  # np.ndarray[subclass-specific]
     def _unique(
         self,
@@ -159,18 +161,15 @@ class HashTable:
         return_inverse: bool = ...,
     ) -> tuple[
         np.ndarray,  # np.ndarray[subclass-specific]
-        np.ndarray,  # np.ndarray[np.intp],
+        npt.NDArray[np.intp],
     ] | np.ndarray: ...  # np.ndarray[subclass-specific]
     def factorize(
         self,
         values: np.ndarray,  # np.ndarray[subclass-specific]
         na_sentinel: int = ...,
         na_value: object = ...,
         mask=...,
-    ) -> tuple[
-        np.ndarray,  # np.ndarray[subclass-specific]
-        np.ndarray,  # np.ndarray[np.intp],
-    ]: ...
+    ) -> tuple[np.ndarray, npt.NDArray[np.intp],]: ...  # np.ndarray[subclass-specific]
 
 class Complex128HashTable(HashTable): ...
 class Complex64HashTable(HashTable): ...
@@ -182,10 +181,7 @@ class Int64HashTable(HashTable):
     def get_labels_groupby(
         self,
         values: np.ndarray,  # const int64_t[:]
-    ) -> tuple[
-        np.ndarray,  # np.ndarray[np.intp]
-        np.ndarray,  # np.ndarray[np.int64]
-    ]: ...
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64],]: ...
 
 class Int32HashTable(HashTable): ...
 class Int16HashTable(HashTable): ...
@@ -200,32 +196,32 @@ class PyObjectHashTable(HashTable): ...
 def duplicated_int64(
     values: np.ndarray,  # const int64_t[:] values
     keep: Literal["last", "first", False] = ...,
-) -> np.ndarray: ...  # np.ndarray[bool]
+) -> npt.NDArray[np.bool_]: ...
 
 # TODO: Is it actually bool or is it uint8?
 
 def mode_int64(
     values: np.ndarray,  # const int64_t[:] values
     dropna: bool,
-) -> np.ndarray: ...  # np.ndarray[np.int64]
+) -> npt.NDArray[np.int64]: ...
 def value_count_int64(
     values: np.ndarray,  # const int64_t[:]
     dropna: bool,
-) -> tuple[np.ndarray, np.ndarray,]: ...  # np.ndarray[np.int64]  # np.ndarray[np.int64]
+) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
 def duplicated(
     values: np.ndarray,
     keep: Literal["last", "first", False] = ...,
-) -> np.ndarray: ...  # np.ndarray[bool]
+) -> npt.NDArray[np.bool_]: ...
 def mode(values: np.ndarray, dropna: bool) -> np.ndarray: ...
 def value_count(
     values: np.ndarray,
     dropna: bool,
-) -> tuple[np.ndarray, np.ndarray,]: ...  # np.ndarray[np.int64]
+) -> tuple[np.ndarray, npt.NDArray[np.int64],]: ...  # np.ndarray[same-as-values]
 
 # arr and values should have same dtype
 def ismember(
     arr: np.ndarray,
     values: np.ndarray,
-) -> np.ndarray: ...  # np.ndarray[bool]
+) -> npt.NDArray[np.bool_]: ...
 def object_hash(obj) -> int: ...
 def objects_are_equal(a, b) -> bool: ...
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -298,7 +298,6 @@ cdef class IndexEngine:
             Py_ssize_t i, j, n, n_t, n_alloc
             bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
 
-        self._ensure_mapping_populated()
         values = self.values
         stargets = set(targets)
 
@@ -740,7 +739,6 @@ cdef class BaseMultiIndexCodesEngine:
         return self._base.get_loc(self, lab_int)
 
     def get_indexer_non_unique(self, target: np.ndarray) -> np.ndarray:
-        # target: MultiIndex
         indexer = self._base.get_indexer_non_unique(self, target)
 
         return indexer

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -28,6 +28,7 @@
     DtypeObj,
     F,
     Shape,
+    npt,
 )
 from pandas.util._decorators import cache_readonly
 from pandas.util._validators import validate_bool_kwarg
@@ -1278,7 +1279,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:
 
         return result_blocks
 
-    def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
+    def _unstack(
+        self,
+        unstacker,
+        fill_value,
+        new_placement: npt.NDArray[np.intp],
+        allow_fill: bool,
+    ):
         """
         Return a list of unstacked blocks of self
 
@@ -1668,7 +1675,13 @@ def where(self, other, cond, errors="raise") -> list[Block]:
 
         return [self.make_block_same_class(result)]
 
-    def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
+    def _unstack(
+        self,
+        unstacker,
+        fill_value,
+        new_placement: npt.NDArray[np.intp],
+        allow_fill: bool,
+    ):
         # ExtensionArray-safe unstack.
         # We override ObjectBlock._unstack, which unstacks directly on the
         # values of the array. For EA-backed blocks, this would require

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -2172,10 +2172,16 @@ def _factorize_keys(
 
     rizer = klass(max(len(lk), len(rk)))
 
-    llab = rizer.factorize(lk)
-    rlab = rizer.factorize(rk)
-    assert llab.dtype == np.intp, llab.dtype
-    assert rlab.dtype == np.intp, rlab.dtype
+    # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
+    # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
+    # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
+    llab = rizer.factorize(lk)  # type: ignore[arg-type]
+    # Argument 1 to "factorize" of "ObjectFactorizer" has incompatible type
+    # "Union[ndarray[Any, dtype[signedinteger[_64Bit]]],
+    # ndarray[Any, dtype[object_]]]"; expected "ndarray[Any, dtype[object_]]"
+    rlab = rizer.factorize(rk)  # type: ignore[arg-type]
+    assert llab.dtype == np.dtype(np.intp), llab.dtype
+    assert rlab.dtype == np.dtype(np.intp), rlab.dtype
 
     count = rizer.get_count()
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -449,7 +449,9 @@ def __init__(
         self.name = name
         self._set_axis(0, index, fastpath=True)
 
-    def _init_dict(self, data, index=None, dtype: Dtype | None = None):
+    def _init_dict(
+        self, data, index: Index | None = None, dtype: DtypeObj | None = None
+    ):
         """
         Derive the "_mgr" and "index" attributes of a new Series from a
         dictionary input.
@@ -458,16 +460,18 @@ def _init_dict(self, data, index=None, dtype: Dtype | None = None):
         ----------
         data : dict or dict-like
             Data used to populate the new Series.
-        index : Index or index-like, default None
+        index : Index or None, default None
             Index for the new Series: if None, use dict keys.
-        dtype : dtype, default None
+        dtype : np.dtype, ExtensionDtype, or None, default None
             The dtype for the new Series: if None, infer from data.
 
         Returns
         -------
         _data : BlockManager for the new Series
         index : index for the new Series
         """
+        keys: Index | tuple
+
         # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
         # raises KeyError), so we iterate the entire dict, and align
         if data:

diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
@@ -32,6 +32,7 @@ def df():
 # TODO(ArrayManager) dask is still accessing the blocks
 # https://github.com/dask/dask/pull/7318
 @td.skip_array_manager_not_yet_implemented
+@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
 def test_dask(df):
 
     toolz = import_module("toolz")  # noqa
@@ -92,6 +93,7 @@ def test_oo_optimized_datetime_index_unpickle():
 # Cython import warning
 @pytest.mark.filterwarnings("ignore:pandas.util.testing is deprecated")
 @pytest.mark.filterwarnings("ignore:can't:ImportWarning")
+@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning")
 @pytest.mark.filterwarnings(
     # patsy needs to update their imports
     "ignore:Using or importing the ABCs from 'collections:DeprecationWarning"