From a862273a970202fc5bc56efad10a99d7fcfa7db0 Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <mzeitlin@caltech.edu>
Date: Sun, 12 Sep 2021 15:03:53 -0400
Subject: [PATCH] TYP: SparseArray methods

---
 pandas/core/arrays/sparse/array.py | 44 ++++++++++++++++++------------
 pandas/core/dtypes/concat.py       |  8 +++++-
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 6ae216cd3263c..dad6a3d082963 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -97,7 +97,12 @@ class ellipsis(Enum):
 
     Ellipsis = ellipsis.Ellipsis
 
-    from pandas._typing import NumpySorter
+    from scipy.sparse import spmatrix
+
+    from pandas._typing import (
+        FillnaOptions,
+        NumpySorter,
+    )
 
     from pandas import Series
 
@@ -139,7 +144,7 @@ def _get_fill(arr: SparseArray) -> np.ndarray:
 
 def _sparse_array_op(
     left: SparseArray, right: SparseArray, op: Callable, name: str
-) -> Any:
+) -> SparseArray:
     """
     Perform a binary operation between two arrays.
 
@@ -227,7 +232,9 @@ def _sparse_array_op(
     return _wrap_result(name, result, index, fill, dtype=result_dtype)
 
 
-def _wrap_result(name, data, sparse_index, fill_value, dtype: Dtype | None = None):
+def _wrap_result(
+    name: str, data, sparse_index, fill_value, dtype: Dtype | None = None
+) -> SparseArray:
     """
     wrap op result to have correct dtype
     """
@@ -498,7 +505,7 @@ def _simple_new(
         return new
 
     @classmethod
-    def from_spmatrix(cls, data):
+    def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT:
         """
         Create a SparseArray from a scipy.sparse matrix.
 
@@ -688,7 +695,12 @@ def isna(self):
         dtype = SparseDtype(bool, self._null_fill_value)
         return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype)
 
-    def fillna(self, value=None, method=None, limit=None):
+    def fillna(
+        self: SparseArrayT,
+        value=None,
+        method: FillnaOptions | None = None,
+        limit: int | None = None,
+    ) -> SparseArrayT:
         """
         Fill missing values with `value`.
 
@@ -743,7 +755,7 @@ def fillna(self, value=None, method=None, limit=None):
 
         return self._simple_new(new_values, self._sparse_index, new_dtype)
 
-    def shift(self, periods: int = 1, fill_value=None):
+    def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT:
 
         if not len(self) or periods == 0:
             return self.copy()
@@ -789,7 +801,7 @@ def _first_fill_value_loc(self):
         diff = indices[1:] - indices[:-1]
         return np.searchsorted(diff, 2) + 1
 
-    def unique(self):
+    def unique(self: SparseArrayT) -> SparseArrayT:
         uniques = list(algos.unique(self.sp_values))
         fill_loc = self._first_fill_value_loc()
         if fill_loc >= 0:
@@ -800,17 +812,15 @@ def _values_for_factorize(self):
         # Still override this for hash_pandas_object
         return np.asarray(self), self.fill_value
 
-    def factorize(self, na_sentinel=-1):
+    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, SparseArray]:
         # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA]
         # The sparsity on this is backwards from what Sparse would want. Want
         # ExtensionArray.factorize -> Tuple[EA, EA]
         # Given that we have to return a dense array of codes, why bother
         # implementing an efficient factorize?
         codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
-        # error: Incompatible types in assignment (expression has type "SparseArray",
-        # variable has type "Union[ndarray, Index]")
-        uniques = SparseArray(uniques, dtype=self.dtype)  # type: ignore[assignment]
-        return codes, uniques
+        uniques_sp = SparseArray(uniques, dtype=self.dtype)
+        return codes, uniques_sp
 
     def value_counts(self, dropna: bool = True) -> Series:
         """
@@ -928,8 +938,8 @@ def _get_val_at(self, loc):
             return val
 
     def take(
-        self, indices, *, allow_fill: bool = False, fill_value=None
-    ) -> SparseArray:
+        self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None
+    ) -> SparseArrayT:
         if is_scalar(indices):
             raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.")
         indices = np.asarray(indices, dtype=np.int32)
@@ -1220,7 +1230,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
             sp_values, self.sp_index, dtype  # type: ignore[arg-type]
         )
 
-    def map(self, mapper):
+    def map(self: SparseArrayT, mapper) -> SparseArrayT:
         """
         Map categories using input correspondence (dict, Series, or function).
 
@@ -1272,7 +1282,7 @@ def map(self, mapper):
 
         return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)
 
-    def to_dense(self):
+    def to_dense(self) -> np.ndarray:
         """
         Convert SparseArray to a NumPy array.
 
@@ -1405,7 +1415,7 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
                 return na_value_for_dtype(self.dtype.subtype, compat=False)
             return sp_sum + self.fill_value * nsparse
 
-    def cumsum(self, axis=0, *args, **kwargs):
+    def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray:
         """
         Cumulative sum of non-NA/null values.
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b0d00775bbed1..c7fce9fff3631 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -53,8 +53,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
         # problem case: SparseArray.astype(dtype) doesn't follow the specified
         # dtype exactly, but converts this to Sparse[dtype] -> first manually
         # convert to dense array
+
+        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
+        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
+        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
+        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
+        # Tuple[Any, Any]]]"  [arg-type]
         arr = cast(SparseArray, arr)
-        return arr.to_dense().astype(dtype, copy=False)
+        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]
 
     if (
         isinstance(arr, np.ndarray)