Skip to content

BUG: Summing a sparse boolean series raises TypeError #34220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -853,7 +853,7 @@ Sparse
^^^^^^
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)
- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`)
-
- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`)

ExtensionArray
^^^^^^^^^^^^^^
Expand Down
21 changes: 19 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pandas._libs.sparse as splib
from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex
from pandas._libs.tslibs import NaT
from pandas._typing import Scalar
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import PerformanceWarning
Expand Down Expand Up @@ -46,6 +47,7 @@
from pandas.core.construction import extract_array, sanitize_array
from pandas.core.indexers import check_array_indexer
from pandas.core.missing import interpolate_2d
from pandas.core.nanops import check_below_min_count
import pandas.core.ops as ops
from pandas.core.ops.common import unpack_zerodim_and_defer

Expand Down Expand Up @@ -1220,21 +1222,36 @@ def any(self, axis=0, *args, **kwargs):

return values.any().item()

def sum(self, axis=0, *args, **kwargs):
def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
"""
Sum of non-NA/null values

Parameters
----------
axis : int, default 0
Not Used. NumPy compatibility.
min_count : int, default 0
The required number of valid values to perform the summation. If fewer
than ``min_count`` valid values are present, the result will be the missing
value indicator for subarray type.
*args, **kwargs
Not Used. NumPy compatibility.

Returns
-------
sum : float
scalar
"""
nv.validate_sum(args, kwargs)
valid_vals = self._valid_sp_values
sp_sum = valid_vals.sum()
if self._null_fill_value:
if check_below_min_count(valid_vals.shape, None, min_count):
return na_value_for_dtype(self.dtype.subtype, compat=False)
return sp_sum
else:
nsparse = self.sp_index.ngaps
if check_below_min_count(valid_vals.shape, None, min_count - nsparse):
return na_value_for_dtype(self.dtype.subtype, compat=False)
return sp_sum + self.fill_value * nsparse

def cumsum(self, axis=0, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,9 @@ def na_value_for_dtype(dtype, compat: bool = True):
return 0
return np.nan
elif is_bool_dtype(dtype):
return False
if compat:
return False
return np.nan
return np.nan


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,7 +1283,7 @@ def _maybe_null_out(

def check_below_min_count(
shape: Tuple[int, ...], mask: Optional[np.ndarray], min_count: int
):
) -> bool:
"""
Check for the `min_count` keyword. Returns True if below `min_count` (when
missing value should be returned from the reduction).
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/arrays/sparse/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,25 @@ def test_sum(self):
out = SparseArray(data, fill_value=np.nan).sum()
assert out == 40.0

@pytest.mark.parametrize(
"arr",
[
np.array([0, 1, np.nan, 1]),
np.array([0, 1, 1]),
np.array([True, True, False]),
],
)
@pytest.mark.parametrize("fill_value", [0, 1, np.nan, True, False])
@pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
def test_sum_min_count(self, arr, fill_value, min_count, expected):
# https://github.com/pandas-dev/pandas/issues/25777
sparray = SparseArray(arr, fill_value=fill_value)
result = sparray.sum(min_count=min_count)
if np.isnan(expected):
assert np.isnan(result)
else:
assert result == expected

def test_numpy_sum(self):
data = np.arange(10).astype(float)
out = np.sum(SparseArray(data))
Expand Down