Skip to content

Commit a472432

Browse files
committed
API: Return sparse objects always for cumsum
Always return SparseArray and SparseSeries for SparseArray.cumsum() and SparseSeries.cumsum() respectively, regardless of fill_value. Close gh-12855.
1 parent 51f725f commit a472432

File tree

5 files changed

+84
-52
lines changed

5 files changed

+84
-52
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ Backwards incompatible API changes
6363

6464

6565
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
66+
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
6667

6768

6869

pandas/sparse/array.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -620,19 +620,30 @@ def sum(self, axis=0, *args, **kwargs):
620620

621621
def cumsum(self, axis=0, *args, **kwargs):
622622
"""
623-
Cumulative sum of values. Preserves locations of NaN values
623+
Cumulative sum of non-NA/null values.
624+
625+
When performing the cumulative summation, any non-NA/null values will
626+
be skipped. The resulting SparseArray will preserve the locations of
627+
NaN values, but the fill value will be `np.nan` regardless.
628+
629+
Parameters
630+
----------
631+
axis : int or None
632+
Axis over which to perform the cumulative summation. If None,
633+
perform cumulative summation over flattened array.
624634
625635
Returns
626636
-------
627-
cumsum : Series
637+
cumsum : SparseArray
628638
"""
629639
nv.validate_cumsum(args, kwargs)
630640

631-
# TODO: gh-12855 - return a SparseArray here
632-
if notnull(self.fill_value):
633-
return self.to_dense().cumsum()
641+
if axis is not None and axis >= self.ndim: # Mimic ndarray behaviour.
642+
raise ValueError("axis(={axis}) out of bounds".format(axis=axis))
643+
644+
if not self._null_fill_value:
645+
return SparseArray(self.to_dense()).cumsum()
634646

635-
# TODO: what if sp_values contains NaN??
636647
return SparseArray(self.sp_values.cumsum(), sparse_index=self.sp_index,
637648
fill_value=self.fill_value)
638649

pandas/sparse/series.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -630,21 +630,29 @@ def take(self, indices, axis=0, convert=True, *args, **kwargs):
630630

631631
def cumsum(self, axis=0, *args, **kwargs):
632632
"""
633-
Cumulative sum of values. Preserves locations of NaN values
633+
Cumulative sum of non-NA/null values.
634+
635+
When performing the cumulative summation, any non-NA/null values will
636+
be skipped. The resulting SparseSeries will preserve the locations of
637+
NaN values, but the fill value will be `np.nan` regardless.
638+
639+
Parameters
640+
----------
641+
axis : 0 or None
634642
635643
Returns
636644
-------
637-
cumsum : SparseSeries if `self` has a null `fill_value` and a
638-
generic Series otherwise
645+
cumsum : SparseSeries
639646
"""
640647
nv.validate_cumsum(args, kwargs)
641-
new_array = SparseArray.cumsum(self.values)
642-
if isinstance(new_array, SparseArray):
643-
return self._constructor(
644-
new_array, index=self.index,
645-
sparse_index=new_array.sp_index).__finalize__(self)
646-
# TODO: gh-12855 - return a SparseSeries here
647-
return Series(new_array, index=self.index).__finalize__(self)
648+
if axis is not None:
649+
self._get_axis_number(axis) # Unused but hould be valid!
650+
651+
new_array = self.values.cumsum()
652+
653+
return self._constructor(
654+
new_array, index=self.index,
655+
sparse_index=new_array.sp_index).__finalize__(self)
648656

649657
@Appender(generic._shared_docs['isnull'])
650658
def isnull(self):

pandas/sparse/tests/test_array.py

+43-32
Original file line numberDiff line numberDiff line change
@@ -688,46 +688,57 @@ def test_numpy_sum(self):
688688
SparseArray(data), out=out)
689689

690690
def test_cumsum(self):
691-
data = np.arange(10).astype(float)
692-
out = SparseArray(data).cumsum()
693-
expected = SparseArray(data.cumsum())
694-
tm.assert_sp_array_equal(out, expected)
691+
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
692+
non_null_expected = SparseArray(non_null_data.cumsum())
695693

696-
# TODO: gh-12855 - return a SparseArray here
697-
data[5] = np.nan
698-
out = SparseArray(data, fill_value=2).cumsum()
699-
self.assertNotIsInstance(out, SparseArray)
700-
tm.assert_numpy_array_equal(out, data.cumsum())
694+
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
695+
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
696+
697+
for data, expected in [
698+
(null_data, null_expected),
699+
(non_null_data, non_null_expected)
700+
]:
701+
out = SparseArray(data).cumsum()
702+
tm.assert_sp_array_equal(out, expected)
703+
704+
out = SparseArray(data, fill_value=np.nan).cumsum()
705+
tm.assert_sp_array_equal(out, expected)
701706

702-
out = SparseArray(data, fill_value=np.nan).cumsum()
703-
expected = SparseArray(np.array([
704-
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
705-
tm.assert_sp_array_equal(out, expected)
707+
out = SparseArray(data, fill_value=2).cumsum()
708+
tm.assert_sp_array_equal(out, expected)
709+
710+
axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
711+
msg = "axis\(={axis}\) out of bounds".format(axis=axis)
712+
with tm.assertRaisesRegexp(ValueError, msg):
713+
SparseArray(data).cumsum(axis=axis)
706714

707715
def test_numpy_cumsum(self):
708-
data = np.arange(10).astype(float)
709-
out = np.cumsum(SparseArray(data))
710-
expected = SparseArray(data.cumsum())
711-
tm.assert_sp_array_equal(out, expected)
716+
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
717+
non_null_expected = SparseArray(non_null_data.cumsum())
712718

713-
# TODO: gh-12855 - return a SparseArray here
714-
data[5] = np.nan
715-
out = np.cumsum(SparseArray(data, fill_value=2))
716-
self.assertNotIsInstance(out, SparseArray)
717-
tm.assert_numpy_array_equal(out, data.cumsum())
719+
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
720+
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
718721

719-
out = np.cumsum(SparseArray(data, fill_value=np.nan))
720-
expected = SparseArray(np.array([
721-
0, 1, 3, 6, 10, np.nan, 16, 23, 31, 40]))
722-
tm.assert_sp_array_equal(out, expected)
722+
for data, expected in [
723+
(null_data, null_expected),
724+
(non_null_data, non_null_expected)
725+
]:
726+
out = np.cumsum(SparseArray(data))
727+
tm.assert_sp_array_equal(out, expected)
723728

724-
msg = "the 'dtype' parameter is not supported"
725-
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
726-
SparseArray(data), dtype=np.int64)
729+
out = np.cumsum(SparseArray(data, fill_value=np.nan))
730+
tm.assert_sp_array_equal(out, expected)
727731

728-
msg = "the 'out' parameter is not supported"
729-
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
730-
SparseArray(data), out=out)
732+
out = np.cumsum(SparseArray(data, fill_value=2))
733+
tm.assert_sp_array_equal(out, expected)
734+
735+
msg = "the 'dtype' parameter is not supported"
736+
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
737+
SparseArray(data), dtype=np.int64)
738+
739+
msg = "the 'out' parameter is not supported"
740+
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
741+
SparseArray(data), out=out)
731742

732743
def test_mean(self):
733744
data = np.arange(10).astype(float)

pandas/sparse/tests/test_series.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1331,21 +1331,22 @@ def test_cumsum(self):
13311331
expected = SparseSeries(self.bseries.to_dense().cumsum())
13321332
tm.assert_sp_series_equal(result, expected)
13331333

1334-
# TODO: gh-12855 - return a SparseSeries here
13351334
result = self.zbseries.cumsum()
13361335
expected = self.zbseries.to_dense().cumsum()
1337-
self.assertNotIsInstance(result, SparseSeries)
13381336
tm.assert_series_equal(result, expected)
13391337

1338+
axis = 1 # Series is 1-D, so only axis = 0 is valid.
1339+
msg = "No axis named {axis}".format(axis=axis)
1340+
with tm.assertRaisesRegexp(ValueError, msg):
1341+
self.bseries.cumsum(axis=axis)
1342+
13401343
def test_numpy_cumsum(self):
13411344
result = np.cumsum(self.bseries)
13421345
expected = SparseSeries(self.bseries.to_dense().cumsum())
13431346
tm.assert_sp_series_equal(result, expected)
13441347

1345-
# TODO: gh-12855 - return a SparseSeries here
13461348
result = np.cumsum(self.zbseries)
13471349
expected = self.zbseries.to_dense().cumsum()
1348-
self.assertNotIsInstance(result, SparseSeries)
13491350
tm.assert_series_equal(result, expected)
13501351

13511352
msg = "the 'dtype' parameter is not supported"

0 commit comments

Comments
 (0)