Skip to content

Commit b2af5ee

Browse files
committed
API: Return sparse objects always for cumsum
Always return SparseArray and SparseSeries for SparseArray.cumsum() and SparseSeries.cumsum() respectively, regardless of fill_value. Close gh-12855.
1 parent 1b0333b commit b2af5ee

File tree

5 files changed

+73
-52
lines changed

5 files changed

+73
-52
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Backwards incompatible API changes
6060

6161

6262
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
63+
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
6364

6465

6566

pandas/sparse/array.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -620,19 +620,28 @@ def sum(self, axis=0, *args, **kwargs):
620620

621621
def cumsum(self, axis=0, *args, **kwargs):
622622
"""
623-
Cumulative sum of values. Preserves locations of NaN values
623+
Cumulative sum of non-NA/null values.
624+
625+
When performing the cumulative summation, any non-NA/null values will
626+
be skipped. The resulting SparseArray will preserve the locations of
627+
NaN values, but the fill value will be `np.nan` regardless.
628+
629+
Parameters
630+
----------
631+
axis : int
632+
Axis over which to perform the cumulative summation. Currently,
633+
this parameter is ignored because `SparseArray` only works with
634+
1-D array-like objects.
624635
625636
Returns
626637
-------
627-
cumsum : Series
638+
cumsum : SparseArray
628639
"""
629640
nv.validate_cumsum(args, kwargs)
630641

631-
# TODO: gh-12855 - return a SparseArray here
632-
if notnull(self.fill_value):
633-
return self.to_dense().cumsum()
642+
if not self._null_fill_value:
643+
return SparseArray(self.to_dense()).cumsum()
634644

635-
# TODO: what if sp_values contains NaN??
636645
return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index,
637646
fill_value=self.fill_value)
638647

pandas/sparse/series.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -630,21 +630,30 @@ def take(self, indices, axis=0, convert=True, *args, **kwargs):
630630

631631
def cumsum(self, axis=0, *args, **kwargs):
632632
"""
633-
Cumulative sum of values. Preserves locations of NaN values
633+
Cumulative sum of non-NA/null values.
634+
635+
When performing the cumulative summation, any non-NA/null values will
636+
be skipped. The resulting SparseSeries will preserve the locations of
637+
NaN values, but the fill value will be `np.nan` regardless.
638+
639+
Parameters
640+
----------
641+
axis : int
642+
Axis over which to perform the cumulative summation. This
643+
parameter is ignored because `SparseSeries` is 1-D, but it
644+
is kept in the signature for consistency with other sparse
645+
array-like objects.
634646
635647
Returns
636648
-------
637-
cumsum : SparseSeries if `self` has a null `fill_value` and a
638-
generic Series otherwise
649+
cumsum : SparseSeries
639650
"""
640651
nv.validate_cumsum(args, kwargs)
641-
new_array = SparseArray.cumsum(self.values)
642-
if isinstance(new_array, SparseArray):
643-
return self._constructor(
644-
new_array, index=self.index,
645-
sparse_index=new_array.sp_index).__finalize__(self)
646-
# TODO: gh-12855 - return a SparseSeries here
647-
return Series(new_array, index=self.index).__finalize__(self)
652+
new_array = self.values.cumsum()
653+
654+
return self._constructor(
655+
new_array, index=self.index,
656+
sparse_index=new_array.sp_index).__finalize__(self)
648657

649658
@Appender(generic._shared_docs['isnull'])
650659
def isnull(self):

pandas/sparse/tests/test_array.py

+38-32
Original file line numberDiff line numberDiff line change
@@ -688,46 +688,52 @@ def test_numpy_sum(self):
688688
SparseArray(data), out=out)
689689

690690
def test_cumsum(self):
691-
data = np.arange(10).astype(float)
692-
out = SparseArray(data).cumsum()
693-
expected = SparseArray(data.cumsum())
694-
tm.assert_sp_array_equal(out, expected)
691+
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
692+
non_null_expected = SparseArray(non_null_data.cumsum())
695693

696-
# TODO: gh-12855 - return a SparseArray here
697-
data[5] = np.nan
698-
out = SparseArray(data, fill_value=2).cumsum()
699-
self.assertNotIsInstance(out, SparseArray)
700-
tm.assert_numpy_array_equal(out, data.cumsum())
694+
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
695+
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
696+
697+
for data, expected in [
698+
(null_data, null_expected),
699+
(non_null_data, non_null_expected)
700+
]:
701+
out = SparseArray(data).cumsum()
702+
tm.assert_sp_array_equal(out, expected)
701703

702-
out = SparseArray(data, fill_value=np.nan).cumsum()
703-
expected = SparseArray(np.array([
704-
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
705-
tm.assert_sp_array_equal(out, expected)
704+
out = SparseArray(data, fill_value=np.nan).cumsum()
705+
tm.assert_sp_array_equal(out, expected)
706+
707+
out = SparseArray(data, fill_value=2).cumsum()
708+
tm.assert_sp_array_equal(out, expected)
706709

707710
def test_numpy_cumsum(self):
708-
data = np.arange(10).astype(float)
709-
out = np.cumsum(SparseArray(data))
710-
expected = SparseArray(data.cumsum())
711-
tm.assert_sp_array_equal(out, expected)
711+
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
712+
non_null_expected = SparseArray(non_null_data.cumsum())
712713

713-
# TODO: gh-12855 - return a SparseArray here
714-
data[5] = np.nan
715-
out = np.cumsum(SparseArray(data, fill_value=2))
716-
self.assertNotIsInstance(out, SparseArray)
717-
tm.assert_numpy_array_equal(out, data.cumsum())
714+
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
715+
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
718716

719-
out = np.cumsum(SparseArray(data, fill_value=np.nan))
720-
expected = SparseArray(np.array([
721-
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
722-
tm.assert_sp_array_equal(out, expected)
717+
for data, expected in [
718+
(null_data, null_expected),
719+
(non_null_data, non_null_expected)
720+
]:
721+
out = np.cumsum(SparseArray(data))
722+
tm.assert_sp_array_equal(out, expected)
723723

724-
msg = "the 'dtype' parameter is not supported"
725-
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
726-
SparseArray(data), dtype=np.int64)
724+
out = np.cumsum(SparseArray(data, fill_value=np.nan))
725+
tm.assert_sp_array_equal(out, expected)
727726

728-
msg = "the 'out' parameter is not supported"
729-
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
730-
SparseArray(data), out=out)
727+
out = np.cumsum(SparseArray(data, fill_value=2))
728+
tm.assert_sp_array_equal(out, expected)
729+
730+
msg = "the 'dtype' parameter is not supported"
731+
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
732+
SparseArray(data), dtype=np.int64)
733+
734+
msg = "the 'out' parameter is not supported"
735+
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
736+
SparseArray(data), out=out)
731737

732738
def test_mean(self):
733739
data = np.arange(10).astype(float)

pandas/sparse/tests/test_series.py

-4
Original file line numberDiff line numberDiff line change
@@ -1331,21 +1331,17 @@ def test_cumsum(self):
13311331
expected = SparseSeries(self.bseries.to_dense().cumsum())
13321332
tm.assert_sp_series_equal(result, expected)
13331333

1334-
# TODO: gh-12855 - return a SparseSeries here
13351334
result = self.zbseries.cumsum()
13361335
expected = self.zbseries.to_dense().cumsum()
1337-
self.assertNotIsInstance(result, SparseSeries)
13381336
tm.assert_series_equal(result, expected)
13391337

13401338
def test_numpy_cumsum(self):
13411339
result = np.cumsum(self.bseries)
13421340
expected = SparseSeries(self.bseries.to_dense().cumsum())
13431341
tm.assert_sp_series_equal(result, expected)
13441342

1345-
# TODO: gh-12855 - return a SparseSeries here
13461343
result = np.cumsum(self.zbseries)
13471344
expected = self.zbseries.to_dense().cumsum()
1348-
self.assertNotIsInstance(result, SparseSeries)
13491345
tm.assert_series_equal(result, expected)
13501346

13511347
msg = "the 'dtype' parameter is not supported"

0 commit comments

Comments
 (0)