Skip to content

BUG: ufunc is not applied to sparse.fill_value #13853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions doc/source/sparse.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,28 @@ keeps an arrays of all of the locations where the data are not equal to the
fill value. The ``block`` format tracks only the locations and sizes of blocks
of data.

.. _sparse.calculation:

Sparse Calculation
------------------

You can apply NumPy ufuncs to ``SparseArray`` and get ``SparseArray`` result.

.. ipython:: python

arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan])
np.abs(arr)


Note that ufunc is also applied to ``fill_value``. This is needed to get
the correct dense result.

.. ipython:: python

arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
np.abs(arr)
np.abs(arr).to_dense()

.. _sparse.scipysparse:

Interaction with scipy.sparse
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,8 @@ Bug Fixes
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`)
- Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`)
- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`)
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)
Expand Down
28 changes: 28 additions & 0 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,34 @@ def kind(self):
elif isinstance(self.sp_index, IntIndex):
return 'integer'

def __array_wrap__(self, out_arr, context=None):
"""
NumPy calls this method when ufunc is applied

Parameters
----------

out_arr : ndarray
ufunc result (note that ufunc is only applied to sp_values)
context : tuple of 3 elements (ufunc, signature, domain)
for example, following is a context when np.sin is applied to
SparseArray,

(<ufunc 'sin'>, (SparseArray,), 0))

See http://docs.scipy.org/doc/numpy/user/basics.subclassing.html
"""
if isinstance(context, tuple) and len(context) == 3:
ufunc, args, domain = context
# to apply ufunc only to fill_value (to avoid recursive call)
args = [getattr(a, 'fill_value', a) for a in args]
fill_value = ufunc(self.fill_value, *args[1:])
else:
fill_value = self.fill_value

return self._simple_new(out_arr, sp_index=self.sp_index,
fill_value=fill_value)

def __array_finalize__(self, obj):
"""
Gets called after any ufunc or other array operations, necessary
Expand Down
2 changes: 1 addition & 1 deletion pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False):
new_series = {}
for k, v in compat.iteritems(self):
applied = func(v)
applied.fill_value = func(applied.fill_value)
applied.fill_value = func(v.fill_value)
new_series[k] = applied
return self._constructor(
new_series, index=self.index, columns=self.columns,
Expand Down
19 changes: 13 additions & 6 deletions pandas/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,22 @@ def __unicode__(self):
rep = '%s\n%s' % (series_rep, repr(self.sp_index))
return rep

def __array_wrap__(self, result):
def __array_wrap__(self, result, context=None):
"""
Gets called prior to a ufunc (and after)

See SparseArray.__array_wrap__ for detail.
"""
if isinstance(context, tuple) and len(context) == 3:
ufunc, args, domain = context
args = [getattr(a, 'fill_value', a) for a in args]
fill_value = ufunc(self.fill_value, *args[1:])
else:
fill_value = self.fill_value

return self._constructor(result, index=self.index,
sparse_index=self.sp_index,
fill_value=self.fill_value,
fill_value=fill_value,
copy=False).__finalize__(self)

def __array_finalize__(self, obj):
Expand Down Expand Up @@ -434,10 +443,8 @@ def abs(self):
-------
abs: type of caller
"""
res_sp_values = np.abs(self.sp_values)
return self._constructor(res_sp_values, index=self.index,
sparse_index=self.sp_index,
fill_value=self.fill_value).__finalize__(self)
return self._constructor(np.abs(self.values),
index=self.index).__finalize__(self)

def get(self, label, default=None):
"""
Expand Down
46 changes: 46 additions & 0 deletions pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,52 @@ def test_numpy_mean(self):
tm.assertRaisesRegexp(ValueError, msg, np.mean,
SparseArray(data), out=out)

def test_ufunc(self):
# GH 13853 make sure ufunc is applied to fill_value
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray([1, np.nan, 2, np.nan, 2])
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)

sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why this result is correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because assert_sp_array_equal compares sparse internal representation, it is for prepare correct internal repr. You can see the result is correct from its dense repr.

# test case
sparse = pd.SparseArray([1, -1, 2, -2], fill_value=1)
abs(sparse).to_dense()
# array([ 1.,  1.,  2.,  2.])
# result
pd.SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1).to_dense()
# array([ 1.,  1.,  2.,  2.])

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, good to know. I wasn't 100% clear on how the sparse comparison worked. Thanks!

fill_value=1)
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)

sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
fill_value=1)
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)

sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
tm.assert_sp_array_equal(np.sin(sparse), result)

sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
tm.assert_sp_array_equal(np.sin(sparse), result)

sparse = SparseArray([1, -1, 0, -2], fill_value=0)
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
tm.assert_sp_array_equal(np.sin(sparse), result)

def test_ufunc_args(self):
# GH 13853 make sure ufunc is applied to fill_value, including its arg
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray([2, np.nan, 3, np.nan, -1])
tm.assert_sp_array_equal(np.add(sparse, 1), result)

sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray([2, 0, 3, -1], fill_value=2)
tm.assert_sp_array_equal(np.add(sparse, 1), result)

sparse = SparseArray([1, -1, 0, -2], fill_value=0)
result = SparseArray([2, 0, 1, -1], fill_value=1)
tm.assert_sp_array_equal(np.add(sparse, 1), result)


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
16 changes: 16 additions & 0 deletions pandas/sparse/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,21 @@ def test_abs(self):
tm.assert_sp_series_equal(result, expected)
self.assertEqual(result.name, 'x')

s = SparseSeries([1, -2, 2, -3], fill_value=-2, name='x')
expected = SparseSeries([1, 2, 3], sparse_index=s.sp_index,
fill_value=2, name='x')
result = s.abs()
tm.assert_sp_series_equal(result, expected)
self.assertEqual(result.name, 'x')

result = abs(s)
tm.assert_sp_series_equal(result, expected)
self.assertEqual(result.name, 'x')

result = np.abs(s)
tm.assert_sp_series_equal(result, expected)
self.assertEqual(result.name, 'x')

def test_reindex(self):
def _compare_with_series(sps, new_index):
spsre = sps.reindex(new_index)
Expand Down Expand Up @@ -1288,6 +1303,7 @@ def test_numpy_func_call(self):
for series in ('bseries', 'zbseries'):
getattr(np, func)(getattr(self, series))


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down