Skip to content

Commit e71052c

Browse files
BUG: Summing a sparse boolean series raises TypeError (#34220)
1 parent 965bd59 commit e71052c

File tree

5 files changed

+43
-5
lines changed

5 files changed

+43
-5
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,7 @@ Sparse
853853
^^^^^^
854854
- Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`)
855855
- Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`)
856-
-
856+
- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`)
857857

858858
ExtensionArray
859859
^^^^^^^^^^^^^^

pandas/core/arrays/sparse/array.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pandas._libs.sparse as splib
1414
from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex
1515
from pandas._libs.tslibs import NaT
16+
from pandas._typing import Scalar
1617
import pandas.compat as compat
1718
from pandas.compat.numpy import function as nv
1819
from pandas.errors import PerformanceWarning
@@ -46,6 +47,7 @@
4647
from pandas.core.construction import extract_array, sanitize_array
4748
from pandas.core.indexers import check_array_indexer
4849
from pandas.core.missing import interpolate_2d
50+
from pandas.core.nanops import check_below_min_count
4951
import pandas.core.ops as ops
5052
from pandas.core.ops.common import unpack_zerodim_and_defer
5153

@@ -1220,21 +1222,36 @@ def any(self, axis=0, *args, **kwargs):
12201222

12211223
return values.any().item()
12221224

1223-
def sum(self, axis=0, *args, **kwargs):
1225+
def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar:
12241226
"""
12251227
Sum of non-NA/null values
12261228
1229+
Parameters
1230+
----------
1231+
axis : int, default 0
1232+
Not Used. NumPy compatibility.
1233+
min_count : int, default 0
1234+
The required number of valid values to perform the summation. If fewer
1235+
than ``min_count`` valid values are present, the result will be the missing
1236+
value indicator for subarray type.
1237+
*args, **kwargs
1238+
Not Used. NumPy compatibility.
1239+
12271240
Returns
12281241
-------
1229-
sum : float
1242+
scalar
12301243
"""
12311244
nv.validate_sum(args, kwargs)
12321245
valid_vals = self._valid_sp_values
12331246
sp_sum = valid_vals.sum()
12341247
if self._null_fill_value:
1248+
if check_below_min_count(valid_vals.shape, None, min_count):
1249+
return na_value_for_dtype(self.dtype.subtype, compat=False)
12351250
return sp_sum
12361251
else:
12371252
nsparse = self.sp_index.ngaps
1253+
if check_below_min_count(valid_vals.shape, None, min_count - nsparse):
1254+
return na_value_for_dtype(self.dtype.subtype, compat=False)
12381255
return sp_sum + self.fill_value * nsparse
12391256

12401257
def cumsum(self, axis=0, *args, **kwargs):

pandas/core/dtypes/missing.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,9 @@ def na_value_for_dtype(dtype, compat: bool = True):
520520
return 0
521521
return np.nan
522522
elif is_bool_dtype(dtype):
523-
return False
523+
if compat:
524+
return False
525+
return np.nan
524526
return np.nan
525527

526528

pandas/core/nanops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ def _maybe_null_out(
12831283

12841284
def check_below_min_count(
12851285
shape: Tuple[int, ...], mask: Optional[np.ndarray], min_count: int
1286-
):
1286+
) -> bool:
12871287
"""
12881288
Check for the `min_count` keyword. Returns True if below `min_count` (when
12891289
missing value should be returned from the reduction).

pandas/tests/arrays/sparse/test_array.py

+19
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,25 @@ def test_sum(self):
983983
out = SparseArray(data, fill_value=np.nan).sum()
984984
assert out == 40.0
985985

986+
@pytest.mark.parametrize(
987+
"arr",
988+
[
989+
np.array([0, 1, np.nan, 1]),
990+
np.array([0, 1, 1]),
991+
np.array([True, True, False]),
992+
],
993+
)
994+
@pytest.mark.parametrize("fill_value", [0, 1, np.nan, True, False])
995+
@pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
996+
def test_sum_min_count(self, arr, fill_value, min_count, expected):
997+
# https://github.com/pandas-dev/pandas/issues/25777
998+
sparray = SparseArray(arr, fill_value=fill_value)
999+
result = sparray.sum(min_count=min_count)
1000+
if np.isnan(expected):
1001+
assert np.isnan(result)
1002+
else:
1003+
assert result == expected
1004+
9861005
def test_numpy_sum(self):
9871006
data = np.arange(10).astype(float)
9881007
out = np.sum(SparseArray(data))

0 commit comments

Comments
 (0)