Skip to content

TYP: SparseArray methods #43539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 28 additions & 18 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,14 @@ class ellipsis(Enum):

Ellipsis = ellipsis.Ellipsis

SparseIndexKind = Literal["integer", "block"]
from scipy.sparse import spmatrix

from pandas._typing import (
FillnaOptions,
NumpySorter,
)

from pandas._typing import NumpySorter
SparseIndexKind = Literal["integer", "block"]

from pandas import Series

Expand Down Expand Up @@ -141,7 +146,7 @@ def _get_fill(arr: SparseArray) -> np.ndarray:

def _sparse_array_op(
left: SparseArray, right: SparseArray, op: Callable, name: str
) -> Any:
) -> SparseArray:
"""
Perform a binary operation between two arrays.

Expand Down Expand Up @@ -229,7 +234,9 @@ def _sparse_array_op(
return _wrap_result(name, result, index, fill, dtype=result_dtype)


def _wrap_result(name, data, sparse_index, fill_value, dtype: Dtype | None = None):
def _wrap_result(
name: str, data, sparse_index, fill_value, dtype: Dtype | None = None
) -> SparseArray:
"""
wrap op result to have correct dtype
"""
Expand Down Expand Up @@ -500,7 +507,7 @@ def _simple_new(
return new

@classmethod
def from_spmatrix(cls, data):
def from_spmatrix(cls: type[SparseArrayT], data: spmatrix) -> SparseArrayT:
"""
Create a SparseArray from a scipy.sparse matrix.

Expand Down Expand Up @@ -690,7 +697,12 @@ def isna(self):
dtype = SparseDtype(bool, self._null_fill_value)
return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype)

def fillna(self, value=None, method=None, limit=None):
def fillna(
self: SparseArrayT,
value=None,
method: FillnaOptions | None = None,
limit: int | None = None,
) -> SparseArrayT:
"""
Fill missing values with `value`.

Expand Down Expand Up @@ -745,7 +757,7 @@ def fillna(self, value=None, method=None, limit=None):

return self._simple_new(new_values, self._sparse_index, new_dtype)

def shift(self, periods: int = 1, fill_value=None):
def shift(self: SparseArrayT, periods: int = 1, fill_value=None) -> SparseArrayT:

if not len(self) or periods == 0:
return self.copy()
Expand Down Expand Up @@ -791,7 +803,7 @@ def _first_fill_value_loc(self):
diff = indices[1:] - indices[:-1]
return np.searchsorted(diff, 2) + 1

def unique(self):
def unique(self: SparseArrayT) -> SparseArrayT:
uniques = list(algos.unique(self.sp_values))
fill_loc = self._first_fill_value_loc()
if fill_loc >= 0:
Expand All @@ -802,17 +814,15 @@ def _values_for_factorize(self):
# Still override this for hash_pandas_object
return np.asarray(self), self.fill_value

def factorize(self, na_sentinel=-1):
def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, SparseArray]:
# Currently, ExtensionArray.factorize -> Tuple[ndarray, EA]
# The sparsity on this is backwards from what Sparse would want. Want
# ExtensionArray.factorize -> Tuple[EA, EA]
# Given that we have to return a dense array of codes, why bother
# implementing an efficient factorize?
codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel)
# error: Incompatible types in assignment (expression has type "SparseArray",
# variable has type "Union[ndarray, Index]")
uniques = SparseArray(uniques, dtype=self.dtype) # type: ignore[assignment]
return codes, uniques
uniques_sp = SparseArray(uniques, dtype=self.dtype)
return codes, uniques_sp

def value_counts(self, dropna: bool = True) -> Series:
"""
Expand Down Expand Up @@ -930,8 +940,8 @@ def _get_val_at(self, loc):
return val

def take(
self, indices, *, allow_fill: bool = False, fill_value=None
) -> SparseArray:
self: SparseArrayT, indices, *, allow_fill: bool = False, fill_value=None
) -> SparseArrayT:
if is_scalar(indices):
raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.")
indices = np.asarray(indices, dtype=np.int32)
Expand Down Expand Up @@ -1222,7 +1232,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
sp_values, self.sp_index, dtype # type: ignore[arg-type]
)

def map(self, mapper):
def map(self: SparseArrayT, mapper) -> SparseArrayT:
"""
Map categories using input correspondence (dict, Series, or function).

Expand Down Expand Up @@ -1274,7 +1284,7 @@ def map(self, mapper):

return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value)

def to_dense(self):
def to_dense(self) -> np.ndarray:
"""
Convert SparseArray to a NumPy array.

Expand Down Expand Up @@ -1407,7 +1417,7 @@ def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
return na_value_for_dtype(self.dtype.subtype, compat=False)
return sp_sum + self.fill_value * nsparse

def cumsum(self, axis=0, *args, **kwargs):
def cumsum(self, axis: int = 0, *args, **kwargs) -> SparseArray:
"""
Cumulative sum of non-NA/null values.

Expand Down
8 changes: 7 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,14 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
# problem case: SparseArray.astype(dtype) doesn't follow the specified
# dtype exactly, but converts this to Sparse[dtype] -> first manually
# convert to dense array

# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
# SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
# Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
# Tuple[Any, Any]]]" [arg-type]
arr = cast(SparseArray, arr)
return arr.to_dense().astype(dtype, copy=False)
return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type]

if (
isinstance(arr, np.ndarray)
Expand Down