TYP: SparseArray methods (#43539)

mzeitlin11 · web-flow · commit 3d815ee14093 · 2021-09-13T11:18:09.000-04:00
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -97,9 +97,14 @@ class ellipsis(Enum):
 
     Ellipsis = ellipsis.Ellipsis
 
-    SparseIndexKind = Literal["integer", "block"]
+    from scipy.sparse import spmatrix
+
+    from pandas._typing import (
+        FillnaOptions,
+        NumpySorter,
+    )
 
-    from pandas._typing import NumpySorter
+    SparseIndexKind = Literal["integer", "block"]
 
     from pandas import Series
 
@@ -141,7 +146,7 @@ def _get_fill(arr: SparseArray) -> np.ndarray:
 
 def _sparse_array_op(
     left: SparseArray, right: SparseArray, op: Callable, name: str
-) -> Any:
+) -> SparseArray:
     """
     Perform a binary operation between two arrays.
 
@@ -229,7 +234,9 @@ def _sparse_array_op(
     return _wrap_result(name, result, index, fill, dtype=result_dtype)
 
 
-def _wrap_result(name, data, sparse_index, fill_value, dtype: Dtype | None = None):
+def _wrap_result(
+    name: str, data, sparse_index, fill_value, dtype: Dtype | None = None
+) -> SparseArray:
     """
     wrap op result to have correct dtype
     """
@@ -500,7 +507,7 @@ def _simple_new(
         return new
 
     @classmethod
-    def from_spmatrix(cls, data):
+    def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT:
         """
         Create a SparseArray from a scipy.sparse matrix.
 
@@ -690,7 +697,12 @@ def isna(self):
         dtype = SparseDtype(bool, self._null_fill_value)
         return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype)
 
-    def fillna(self, value=None, method=None, limit=None):
+    def fillna(
+        self: SparseArrayT,
+        value=None,
+        method: FillnaOptions | None = None,
+        limit: int | None = None,
+    ) -> SparseArrayT:
         """
         Fill missing values with `value`.
 
@@ -745,7 +757,7 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._simple_new(new_values, self._sparse_index, new_dtype)
 
-    def shift(self, periods: int = 1, fill_value=None):
+    def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT:
 
         if not len(self) or periods == 0:
             return self.copy()
@@ -791,7 +803,7 @@ def _first_fill_value_loc(self):
         diff = indices[1:] - indices[:-1]
         return np.searchsorted(diff, 2) + 1
 
-    def unique(self):
+    def unique(self: SparseArrayT) -> SparseArrayT:
         uniques = list(algos.unique(self.sp_values))
         fill_loc = self._first_fill_value_loc()
         if fill_loc >= 0:
@@ -802,17 +814,15 @@ def _values_for_factorize(self):
         # Still override this for hash_pandas_object
         return np.asarray(self), self.fill_value
 
-    def factorize(self, na_sentinel=-1):
+    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, SparseArray]:
         # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA]
         # The sparsity on this is backwards from what Sparse would want. Want
         # ExtensionArray.factorize -> Tuple[EA, EA]
         # Given that we have to return a dense array of codes, why bother
         # implementing an efficient factorize?
         codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
-        # error: Incompatible types in assignment (expression has type "SparseArray",
-        # variable has type "Union[ndarray, Index]")
-        uniques = SparseArray(uniques, dtype=self.dtype)  # type: ignore[assignment]
-        return codes, uniques
+        uniques_sp = SparseArray(uniques, dtype=self.dtype)
+        return codes, uniques_sp
 
     def value_counts(self, dropna: bool = True) -> Series:
         """
@@ -930,8 +940,8 @@ def _get_val_at(self, loc):
             return val
 
     def take(
-        self, indices, *, allow_fill: bool = False, fill_value=None
-    ) -> SparseArray:
+        self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None
+    ) -> SparseArrayT:
         if is_scalar(indices):
             raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.")
         indices = np.asarray(indices, dtype=np.int32)
@@ -1222,7 +1232,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
             sp_values, self.sp_index, dtype  # type: ignore[arg-type]
         )
 
-    def map(self, mapper):
+    def map(self: SparseArrayT, mapper) -> SparseArrayT:
         """
         Map categories using input correspondence (dict, Series, or function).
 
@@ -1274,7 +1284,7 @@ def map(self, mapper):
 
         return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)
 
-    def to_dense(self):
+    def to_dense(self) -> np.ndarray:
         """
         Convert SparseArray to a NumPy array.
 
@@ -1407,7 +1417,7 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
                 return na_value_for_dtype(self.dtype.subtype, compat=False)
             return sp_sum + self.fill_value * nsparse
 
-    def cumsum(self, axis=0, *args, **kwargs):
+    def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray:
         """
         Cumulative sum of non-NA/null values.
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -53,8 +53,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
         # problem case: SparseArray.astype(dtype) doesn't follow the specified
         # dtype exactly, but converts this to Sparse[dtype] -> first manually
         # convert to dense array
+
+        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
+        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
+        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
+        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
+        # Tuple[Any, Any]]]"  [arg-type]
         arr = cast(SparseArray, arr)
-        return arr.to_dense().astype(dtype, copy=False)
+        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]
 
     if (
         isinstance(arr, np.ndarray)