pandas-dev · simonjayhawkins · Jan 15, 2022 · Jan 10, 2022 · Jan 11, 2022 · Jan 11, 2022
diff --git a/environment.yml b/environment.yml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   # required
-  - numpy>=1.18.5, <1.22.0
+  - numpy>=1.18.5
   - python=3.8
   - python-dateutil>=2.8.1
   - pytz

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -770,7 +770,12 @@ def factorize(
         # na_value is set based on the dtype of uniques, and compat set to False is
         # because we do not want na_value to be 0 for integers
         na_value = na_value_for_dtype(uniques.dtype, compat=False)
-        uniques = np.append(uniques, [na_value])
+        # Argument 2 to "append" has incompatible type "List[Union[str, float, Period,
+        # Timestamp, Timedelta, Any]]"; expected "Union[_SupportsArray[dtype[Any]],
+        # _NestedSequence[_SupportsArray[dtype[Any]]]
+        # , bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int,
+        # float, complex, str, bytes]]]"  [arg-type]
+        uniques = np.append(uniques, [na_value])  # type: ignore[arg-type]
         codes = np.where(code_is_na, len(uniques) - 1, codes)
 
     uniques = _reconstruct_data(uniques, dtype, original)
@@ -1069,7 +1074,12 @@ def checked_add_with_arr(
     elif arr_mask is not None:
         not_nan = np.logical_not(arr_mask)
     elif b_mask is not None:
-        not_nan = np.logical_not(b2_mask)
+        # Argument 1 to "__call__" of "_UFunc_Nin1_Nout1" has incompatible type
+        # "Optional[ndarray[Any, dtype[bool_]]]"; expected
+        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[dtype[An
+        # y]]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool,
+        # int, float, complex, str, bytes]]]"  [arg-type]
+        not_nan = np.logical_not(b2_mask)  # type: ignore[arg-type]
     else:
         not_nan = np.empty(arr.shape, dtype=bool)
         not_nan.fill(True)

diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
@@ -265,12 +265,7 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
         return result
 
     # Determine if we should defer.
-
-    # error: "Type[ndarray]" has no attribute "__array_ufunc__"
-    no_defer = (
-        np.ndarray.__array_ufunc__,  # type: ignore[attr-defined]
-        cls.__array_ufunc__,
-    )
+    no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
 
     for item in inputs:
         higher_priority = (

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1639,7 +1639,13 @@ def isin(self, values) -> np.ndarray:
                 #  complex128 ndarray is much more performant.
                 left = self._combined.view("complex128")
                 right = values._combined.view("complex128")
-                return np.in1d(left, right)
+                # Argument 1 to "in1d" has incompatible type "Union[ExtensionArray,
+                # ndarray[Any, Any], ndarray[Any, dtype[Any]]]"; expected
+                # "Union[_SupportsArray[dtype[Any]], _NestedSequence[_SupportsArray[
+                # dtype[Any]]], bool, int, float, complex, str, bytes,
+                # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
+                # [arg-type]
+                return np.in1d(left, right)  # type: ignore[arg-type]
 
             elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
                 values.left.dtype

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -943,7 +943,11 @@ def any(self, *, skipna: bool = True, **kwargs):
         nv.validate_any((), kwargs)
 
         values = self._data.copy()
-        np.putmask(values, self._mask, self._falsey_value)
+        # Argument 3 to "putmask" has incompatible type "object"; expected
+        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
+        # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Nested
+        # Sequence[Union[bool, int, float, complex, str, bytes]]]"  [arg-type]
+        np.putmask(values, self._mask, self._falsey_value)  # type: ignore[arg-type]
         result = values.any()
         if skipna:
             return result
@@ -1019,7 +1023,11 @@ def all(self, *, skipna: bool = True, **kwargs):
         nv.validate_all((), kwargs)
 
         values = self._data.copy()
-        np.putmask(values, self._mask, self._truthy_value)
+        # Argument 3 to "putmask" has incompatible type "object"; expected
+        # "Union[_SupportsArray[dtype[Any]], _NestedSequence[
+        # _SupportsArray[dtype[Any]]], bool, int, float, complex, str, bytes, _Neste
+        # dSequence[Union[bool, int, float, complex, str, bytes]]]"  [arg-type]
+        np.putmask(values, self._mask, self._truthy_value)  # type: ignore[arg-type]
         result = values.all()
 
         if skipna:

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -772,7 +772,8 @@ def fillna(
         elif method is not None:
             msg = "fillna with 'method' requires high memory usage."
             warnings.warn(msg, PerformanceWarning)
-            new_values = np.asarray(self)
+            # Need type annotation for "new_values"  [var-annotated]
+            new_values = np.asarray(self)  # type: ignore[var-annotated]
             # interpolate_2d modifies new_values inplace
             interpolate_2d(new_values, method=method, limit=limit)
             return type(self)(new_values, fill_value=self.fill_value)
@@ -924,7 +925,15 @@ def __getitem__(
         if is_integer(key):
             return self._get_val_at(key)
         elif isinstance(key, tuple):
-            data_slice = self.to_dense()[key]
+            # Invalid index type "Tuple[Union[int, ellipsis], ...]" for
+            # "ndarray[Any, Any]"; expected type "Union[SupportsIndex,
+            # _SupportsArray[dtype[Union[bool_, integer[Any]]]], _NestedSequence[_Su
+            # pportsArray[dtype[Union[bool_, integer[Any]]]]],
+            # _NestedSequence[Union[bool, int]], Tuple[Union[SupportsIndex,
+            # _SupportsArray[dtype[Union[bool_, integer[Any]]]],
+            # _NestedSequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], _N
+            # estedSequence[Union[bool, int]]], ...]]"  [index]
+            data_slice = self.to_dense()[key]  # type: ignore[index]
         elif isinstance(key, slice):
 
             # Avoid densifying when handling contiguous slices
@@ -1164,7 +1173,9 @@ def _concat_same_type(
 
             data = np.concatenate(values)
             indices_arr = np.concatenate(indices)
-            sp_index = IntIndex(length, indices_arr)
+            # Argument 2 to "IntIndex" has incompatible type "ndarray[Any,
+            # dtype[signedinteger[_32Bit]]]"; expected "Sequence[int]"
+            sp_index = IntIndex(length, indices_arr)  # type: ignore[arg-type]
 
         else:
             # when concatenating block indices, we don't claim that you'll
@@ -1342,7 +1353,8 @@ def __setstate__(self, state):
         if isinstance(state, tuple):
             # Compat for pandas < 0.24.0
             nd_state, (fill_value, sp_index) = state
-            sparse_values = np.array([])
+            # Need type annotation for "sparse_values"  [var-annotated]
+            sparse_values = np.array([])  # type: ignore[var-annotated]
             sparse_values.__setstate__(nd_state)
 
             self._sparse_values = sparse_values

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1122,9 +1122,7 @@ def astype_nansafe(
         return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False)
 
     elif is_datetime64_dtype(arr.dtype):
-        # Non-overlapping equality check (left operand type: "dtype[Any]", right
-        # operand type: "Type[signedinteger[Any]]")
-        if dtype == np.int64:  # type: ignore[comparison-overlap]
+        if dtype == np.int64:
             warnings.warn(
                 f"casting {arr.dtype} values to int64 with .astype(...) "
                 "is deprecated and will raise in a future version. "
@@ -1143,9 +1141,7 @@ def astype_nansafe(
         raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
 
     elif is_timedelta64_dtype(arr.dtype):
-        # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-        # operand type: "Type[signedinteger[Any]]")
-        if dtype == np.int64:  # type: ignore[comparison-overlap]
+        if dtype == np.int64:
             warnings.warn(
                 f"casting {arr.dtype} values to int64 with .astype(...) "
                 "is deprecated and will raise in a future version. "

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -534,9 +534,7 @@ def is_string_or_object_np_dtype(dtype: np.dtype) -> bool:
     """
     Faster alternative to is_string_dtype, assumes we have a np.dtype object.
     """
-    # error: Non-overlapping equality check (left operand type: "dtype[Any]",
-    # right operand type: "Type[object]")
-    return dtype == object or dtype.kind in "SU"  # type: ignore[comparison-overlap]
+    return dtype == object or dtype.kind in "SU"
 
 
 def is_string_dtype(arr_or_dtype) -> bool:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2428,7 +2428,9 @@ def to_records(
             if dtype_mapping is None:
                 formats.append(v.dtype)
             elif isinstance(dtype_mapping, (type, np.dtype, str)):
-                formats.append(dtype_mapping)
+                # Argument 1 to "append" of "list" has incompatible type
+                # "Union[type, dtype[Any], str]"; expected "dtype[_SCT]"  [arg-type]
+                formats.append(dtype_mapping)  # type: ignore[arg-type]
             else:
                 element = "row" if i < index_len else "column"
                 msg = f"Invalid dtype {dtype_mapping} specified for {element} {name}"

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6979,8 +6979,7 @@ def interpolate(
         # create/use the index
         if method == "linear":
             # prior default
-            index = np.arange(len(obj.index))
-            index = Index(index)
+            index = Index(np.arange(len(obj.index)))
         else:
             index = obj.index
             methods = {"index", "values", "nearest", "time"}

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -158,9 +158,7 @@ def _get_cython_function(
         f = getattr(libgroupby, ftype)
         if is_numeric:
             return f
-        # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-        # operand type: "Literal['object']")
-        elif dtype == object:  # type: ignore[comparison-overlap]
+        elif dtype == object:
             if "object" not in f.__signatures__:
                 # raise NotImplementedError here rather than TypeError later
                 raise NotImplementedError(
@@ -807,6 +805,7 @@ def size(self) -> Series:
         Compute group sizes.
         """
         ids, _, ngroups = self.group_info
+        out: np.ndarray | list
         if ngroups:
             out = np.bincount(ids[ids != -1], minlength=ngroups)
         else:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4534,7 +4534,12 @@ def _join_non_unique(
         right = other._values.take(right_idx)
 
         if isinstance(join_array, np.ndarray):
-            np.putmask(join_array, mask, right)
+            # Argument 3 to "putmask" has incompatible type "Union[ExtensionArray,
+            # ndarray[Any, Any]]"; expected "Union[_SupportsArray[dtype[Any]],
+            # _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, f
+            # loat, complex, str, bytes, _NestedSequence[Union[bool, int, float,
+            # complex, str, bytes]]]"  [arg-type]
+            np.putmask(join_array, mask, right)  # type: ignore[arg-type]
         else:
             join_array._putmask(mask, right)
 
@@ -5034,9 +5039,11 @@ def __getitem__(self, key):
         if result.ndim > 1:
             deprecate_ndim_indexing(result)
             if hasattr(result, "_ndarray"):
+                # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
+                # ndarray[Any, Any]]" has no attribute "_ndarray"  [union-attr]
                 # i.e. NDArrayBackedExtensionArray
                 # Unpack to ndarray for MPL compat
-                return result._ndarray
+                return result._ndarray  # type: ignore[union-attr]
             return result
 
         # NB: Using _constructor._simple_new would break if MultiIndex
@@ -6523,6 +6530,7 @@ def delete(self: _IndexT, loc) -> _IndexT:
         Index(['b'], dtype='object')
         """
         values = self._values
+        res_values: ArrayLike
         if isinstance(values, np.ndarray):
             # TODO(__array_function__): special casing will be unnecessary
             res_values = np.delete(values, loc)
@@ -6576,7 +6584,9 @@ def insert(self, loc: int, item) -> Index:
             new_values = np.insert(arr, loc, casted)
 
         else:
-            new_values = np.insert(arr, loc, None)
+            # No overload variant of "insert" matches argument types
+            # "ndarray[Any, Any]", "int", "None"  [call-overload]
+            new_values = np.insert(arr, loc, None)  # type: ignore[call-overload]
             loc = loc if loc >= 0 else loc - 1
             new_values[loc] = item
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -365,7 +365,9 @@ def _validate_codes(self, level: list, code: list):
         """
         null_mask = isna(level)
         if np.any(null_mask):
-            code = np.where(null_mask[code], -1, code)
+            # Incompatible types in assignment (expression has type
+            # "ndarray[Any, dtype[Any]]", variable has type "List[Any]")
+            code = np.where(null_mask[code], -1, code)  # type: ignore[assignment]
         return code
 
     def _verify_integrity(self, codes: list | None = None, levels: list | None = None):
@@ -1086,7 +1088,9 @@ def _engine(self):
         # equivalent to sorting lexicographically the codes themselves. Notice
         # that each level needs to be shifted by the number of bits needed to
         # represent the _previous_ ones:
-        offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
+        offsets = np.concatenate([lev_bits[1:], [0]]).astype(  # type: ignore[arg-type]
+            "uint64"
+        )
 
         # Check the total number of bits needed for our representation:
         if lev_bits[0] > 64:
@@ -1564,7 +1568,12 @@ def is_monotonic_increasing(self) -> bool:
             self._get_level_values(i)._values for i in reversed(range(len(self.levels)))
         ]
         try:
-            sort_order = np.lexsort(values)
+            # Argument 1 to "lexsort" has incompatible type "List[Union[ExtensionArray,
+            # ndarray[Any, Any]]]"; expected "Union[_SupportsArray[dtype[Any]],
+            # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
+            #  int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float,
+            #  complex, str, bytes]]]"  [arg-type]
+            sort_order = np.lexsort(values)  # type: ignore[arg-type]
             return Index(sort_order).is_monotonic
         except TypeError:
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -2021,7 +2021,9 @@ def _ensure_iterable_column_indexer(self, column_indexer):
         if is_integer(column_indexer):
             ilocs = [column_indexer]
         elif isinstance(column_indexer, slice):
-            ilocs = np.arange(len(self.obj.columns))[column_indexer]
+            ilocs = np.arange(len(self.obj.columns))[  # type: ignore[assignment]
+                column_indexer
+            ]
         elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
             column_indexer.dtype
         ):
@@ -2083,7 +2085,11 @@ def ravel(i):
                 # single indexer
                 if len(indexer) > 1 and not multiindex_indexer:
                     len_indexer = len(indexer[1])
-                    ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T
+                    ser = (
+                        np.tile(ser, len_indexer)  # type: ignore[assignment]
+                        .reshape(len_indexer, -1)
+                        .T
+                    )
 
                 return ser
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -379,7 +379,13 @@ def delete(self, loc) -> None:
         """
         Delete given loc(-s) from block in-place.
         """
-        self.values = np.delete(self.values, loc, 0)
+        # Argument 1 to "delete" has incompatible type "Union[ndarray[Any, Any],
+        # ExtensionArray]"; expected "Union[_SupportsArray[dtype[Any]],
+        # Sequence[_SupportsArray[dtype[Any]]], Sequence[Sequence
+        # [_SupportsArray[dtype[Any]]]], Sequence[Sequence[Sequence[
+        # _SupportsArray[dtype[Any]]]]], Sequence[Sequence[Sequence[Sequence[
+        # _SupportsArray[dtype[Any]]]]]]]"  [arg-type]
+        self.values = np.delete(self.values, loc, 0)  # type: ignore[arg-type]
         self.mgr_locs = self._mgr_locs.delete(loc)
         try:
             self._cache.clear()

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1125,8 +1125,8 @@ def value_getitem(placement):
         unfit_mgr_locs = []
         unfit_val_locs = []
         removed_blknos = []
-        for blkno, val_locs in libinternals.get_blkno_placements(blknos, group=True):
-            blk = self.blocks[blkno]
+        for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True):
+            blk = self.blocks[blkno_l]
             blk_locs = blklocs[val_locs.indexer]
             if inplace and blk.should_store(value):
                 blk.set_inplace(blk_locs, value_getitem(val_locs))
@@ -1136,7 +1136,7 @@ def value_getitem(placement):
 
                 # If all block items are unfit, schedule the block for removal.
                 if len(val_locs) == len(blk.mgr_locs):
-                    removed_blknos.append(blkno)
+                    removed_blknos.append(blkno_l)
                 else:
                     blk.delete(blk_locs)
                     self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk))

diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py
@@ -125,9 +125,7 @@ def _get_same_shape_values(
         # argument type "Tuple[Union[ndarray, slice], slice]"
         lvals = lvals[rblk.mgr_locs.indexer, :]  # type: ignore[call-overload]
         assert lvals.shape[0] == 1, lvals.shape
-        # error: No overload variant of "__getitem__" of "ExtensionArray" matches
-        # argument type "Tuple[int, slice]"
-        lvals = lvals[0, :]  # type: ignore[call-overload]
+        lvals = lvals[0, :]
     else:
         # lvals are 1D, rvals are 2D
         assert rvals.shape[0] == 1, rvals.shape