Skip to content

Commit 3d815ee

Browse files
authored
TYP: SparseArray methods (#43539)
1 parent 6d5f8e5 commit 3d815ee

File tree

2 files changed

+35
-19
lines changed

2 files changed

+35
-19
lines changed

pandas/core/arrays/sparse/array.py

+28-18
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,14 @@ class ellipsis(Enum):
9797

9898
Ellipsis = ellipsis.Ellipsis
9999

100-
SparseIndexKind = Literal["integer", "block"]
100+
from scipy.sparse import spmatrix
101+
102+
from pandas._typing import (
103+
FillnaOptions,
104+
NumpySorter,
105+
)
101106

102-
from pandas._typing import NumpySorter
107+
SparseIndexKind = Literal["integer", "block"]
103108

104109
from pandas import Series
105110

@@ -141,7 +146,7 @@ def _get_fill(arr: SparseArray) -> np.ndarray:
141146

142147
def _sparse_array_op(
143148
left: SparseArray, right: SparseArray, op: Callable, name: str
144-
) -> Any:
149+
) -> SparseArray:
145150
"""
146151
Perform a binary operation between two arrays.
147152
@@ -229,7 +234,9 @@ def _sparse_array_op(
229234
return _wrap_result(name, result, index, fill, dtype=result_dtype)
230235

231236

232-
def _wrap_result(name, data, sparse_index, fill_value, dtype: Dtype | None = None):
237+
def _wrap_result(
238+
name: str, data, sparse_index, fill_value, dtype: Dtype | None = None
239+
) -> SparseArray:
233240
"""
234241
wrap op result to have correct dtype
235242
"""
@@ -500,7 +507,7 @@ def _simple_new(
500507
return new
501508

502509
@classmethod
503-
def from_spmatrix(cls, data):
510+
def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT:
504511
"""
505512
Create a SparseArray from a scipy.sparse matrix.
506513
@@ -690,7 +697,12 @@ def isna(self):
690697
dtype = SparseDtype(bool, self._null_fill_value)
691698
return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype)
692699

693-
def fillna(self, value=None, method=None, limit=None):
700+
def fillna(
701+
self: SparseArrayT,
702+
value=None,
703+
method: FillnaOptions | None = None,
704+
limit: int | None = None,
705+
) -> SparseArrayT:
694706
"""
695707
Fill missing values with `value`.
696708
@@ -745,7 +757,7 @@ def fillna(self, value=None, method=None, limit=None):
745757

746758
return self._simple_new(new_values, self._sparse_index, new_dtype)
747759

748-
def shift(self, periods: int = 1, fill_value=None):
760+
def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT:
749761

750762
if not len(self) or periods == 0:
751763
return self.copy()
@@ -791,7 +803,7 @@ def _first_fill_value_loc(self):
791803
diff = indices[1:] - indices[:-1]
792804
return np.searchsorted(diff, 2) + 1
793805

794-
def unique(self):
806+
def unique(self: SparseArrayT) -> SparseArrayT:
795807
uniques = list(algos.unique(self.sp_values))
796808
fill_loc = self._first_fill_value_loc()
797809
if fill_loc >= 0:
@@ -802,17 +814,15 @@ def _values_for_factorize(self):
802814
# Still override this for hash_pandas_object
803815
return np.asarray(self), self.fill_value
804816

805-
def factorize(self, na_sentinel=-1):
817+
def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, SparseArray]:
806818
# Currently, ExtensionArray.factorize -> Tuple[ndarray, EA]
807819
# The sparsity on this is backwards from what Sparse would want. Want
808820
# ExtensionArray.factorize -> Tuple[EA, EA]
809821
# Given that we have to return a dense array of codes, why bother
810822
# implementing an efficient factorize?
811823
codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
812-
# error: Incompatible types in assignment (expression has type "SparseArray",
813-
# variable has type "Union[ndarray, Index]")
814-
uniques = SparseArray(uniques, dtype=self.dtype) # type: ignore[assignment]
815-
return codes, uniques
824+
uniques_sp = SparseArray(uniques, dtype=self.dtype)
825+
return codes, uniques_sp
816826

817827
def value_counts(self, dropna: bool = True) -> Series:
818828
"""
@@ -930,8 +940,8 @@ def _get_val_at(self, loc):
930940
return val
931941

932942
def take(
933-
self, indices, *, allow_fill: bool = False, fill_value=None
934-
) -> SparseArray:
943+
self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None
944+
) -> SparseArrayT:
935945
if is_scalar(indices):
936946
raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.")
937947
indices = np.asarray(indices, dtype=np.int32)
@@ -1222,7 +1232,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
12221232
sp_values, self.sp_index, dtype # type: ignore[arg-type]
12231233
)
12241234

1225-
def map(self, mapper):
1235+
def map(self: SparseArrayT, mapper) -> SparseArrayT:
12261236
"""
12271237
Map categories using input correspondence (dict, Series, or function).
12281238
@@ -1274,7 +1284,7 @@ def map(self, mapper):
12741284

12751285
return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)
12761286

1277-
def to_dense(self):
1287+
def to_dense(self) -> np.ndarray:
12781288
"""
12791289
Convert SparseArray to a NumPy array.
12801290
@@ -1407,7 +1417,7 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
14071417
return na_value_for_dtype(self.dtype.subtype, compat=False)
14081418
return sp_sum + self.fill_value * nsparse
14091419

1410-
def cumsum(self, axis=0, *args, **kwargs):
1420+
def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray:
14111421
"""
14121422
Cumulative sum of non-NA/null values.
14131423

pandas/core/dtypes/concat.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
5353
# problem case: SparseArray.astype(dtype) doesn't follow the specified
5454
# dtype exactly, but converts this to Sparse[dtype] -> first manually
5555
# convert to dense array
56+
57+
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
58+
# "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
59+
# SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
60+
# Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
61+
# Tuple[Any, Any]]]" [arg-type]
5662
arr = cast(SparseArray, arr)
57-
return arr.to_dense().astype(dtype, copy=False)
63+
return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type]
5864

5965
if (
6066
isinstance(arr, np.ndarray)

0 commit comments

Comments
 (0)