Skip to content

BUG: Default to stat axis for SparseDataFrame when axis=None #13066

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Performance Improvements

Bug Fixes
~~~~~~~~~
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)



Expand Down
7 changes: 7 additions & 0 deletions pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,9 @@ def transpose(self, *args, **kwargs):

@Appender(DataFrame.count.__doc__)
def count(self, axis=0, **kwds):
if axis is None:
axis = self._stat_axis_number

return self.apply(lambda x: x.count(), axis=axis)

def cumsum(self, axis=0, *args, **kwargs):
Expand All @@ -667,6 +670,10 @@ def cumsum(self, axis=0, *args, **kwargs):
y : SparseDataFrame
"""
nv.validate_cumsum(args, kwargs)

if axis is None:
axis = self._stat_axis_number

return self.apply(lambda x: x.cumsum(), axis=axis)

def apply(self, func, axis=0, broadcast=False, reduce=False):
Expand Down
36 changes: 29 additions & 7 deletions pandas/sparse/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import operator

import nose # noqa
from numpy import nan
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -768,12 +767,19 @@ def _check(frame, orig):
self._check_all(_check)

def test_count(self):
result = self.frame.count()
dense_result = self.frame.to_dense().count()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also (maybe in another function), run thru all stat methods (you can just assert that they don't raise), e.g. just do a loop for all named (stat functions).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where can I find all the statistics methods? I just CTRL+F for axis=0, and these were the only two, but I'm not sure how I would automate that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you mean? all of the methods in DataFrame are avaialble

Copy link
Member Author

@gfyoung gfyoung May 3, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, but nevertheless, how do I automate going through all of the stat functions in SparseDataFrame?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just out a list

mean count var etc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not optimal, but I guess that should suffice.

result = self.frame.count()
tm.assert_series_equal(result, dense_result)

result = self.frame.count(axis=None)
tm.assert_series_equal(result, dense_result)

result = self.frame.count(axis=0)
tm.assert_series_equal(result, dense_result)

result = self.frame.count(1)
dense_result = self.frame.to_dense().count(1)
result = self.frame.count(axis=1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this a problem with np.cumsum

sdf.cumsum() works just fine now IIRC

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np.cumsum is just calling sdf.cumsum(axis=None). That's why the Exception is being thrown.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of course, so that's why need to test all of these

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. Done.

dense_result = self.frame.to_dense().count(axis=1)

# win32 don't check dtype
tm.assert_series_equal(result, dense_result, check_dtype=False)
Expand Down Expand Up @@ -862,12 +868,19 @@ def setUp(self):
self.frame = SparseDataFrame(self.data, index=self.dates)

def test_cumsum(self):
result = self.frame.cumsum()
expected = SparseDataFrame(self.frame.to_dense().cumsum())

result = self.frame.cumsum()
tm.assert_sp_frame_equal(result, expected)

result = self.frame.cumsum(axis=None)
tm.assert_sp_frame_equal(result, expected)

result = self.frame.cumsum(axis=0)
tm.assert_sp_frame_equal(result, expected)

def test_numpy_cumsum(self):
result = np.cumsum(self.frame, axis=0)
result = np.cumsum(self.frame)
expected = SparseDataFrame(self.frame.to_dense().cumsum())
tm.assert_sp_frame_equal(result, expected)

Expand All @@ -879,7 +892,16 @@ def test_numpy_cumsum(self):
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
self.frame, out=result)

def test_numpy_func_call(self):
# no exception should be raised even though
# numpy passes in 'axis=None' or `axis=-1'
funcs = ['sum', 'cumsum', 'var',
'mean', 'prod', 'cumprod',
'std', 'min', 'max']
for func in funcs:
getattr(np, func)(self.frame)

if __name__ == '__main__':
import nose # noqa
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
14 changes: 12 additions & 2 deletions pandas/sparse/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import operator

import nose # noqa
from numpy import nan
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -549,6 +548,7 @@ def check(a, b):
def test_binary_operators(self):

# skipping for now #####
import nose
raise nose.SkipTest("skipping sparse binary operators test")

def _check_inplace_op(iop, op):
Expand Down Expand Up @@ -1259,7 +1259,17 @@ def test_numpy_cumsum(self):
tm.assertRaisesRegexp(ValueError, msg, np.cumsum,
self.zbseries, out=result)

def test_numpy_func_call(self):
# no exception should be raised even though
# numpy passes in 'axis=None' or `axis=-1'
funcs = ['sum', 'cumsum', 'var', 'mean',
'prod', 'cumprod', 'std', 'argsort',
'argmin', 'argmax', 'min', 'max']
for func in funcs:
for series in ('bseries', 'zbseries'):
getattr(np, func)(getattr(self, series))

if __name__ == '__main__':
import nose # noqa
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)