Skip to content

Commit 5f47608

Browse files
sinhrksjreback
authored andcommitted
BUG: ufunc is not applied to sparse.fill_value
Author: sinhrks <[email protected]> Closes #13853 from sinhrks/sparse_ufunc and squashes the following commits: a14f573 [sinhrks] BUG: ufunc is not applied to sparse.fill_value
1 parent e2cb799 commit 5f47608

File tree

7 files changed

+128
-7
lines changed

7 files changed

+128
-7
lines changed

doc/source/sparse.rst

+22
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,28 @@ keeps an arrays of all of the locations where the data are not equal to the
130130
fill value. The ``block`` format tracks only the locations and sizes of blocks
131131
of data.
132132

133+
.. _sparse.calculation:
134+
135+
Sparse Calculation
136+
------------------
137+
138+
You can apply NumPy *ufuncs* to ``SparseArray`` and get a ``SparseArray`` as a result.
139+
140+
.. ipython:: python
141+
142+
arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan])
143+
np.abs(arr)
144+
145+
146+
The *ufunc* is also applied to ``fill_value``. This is needed to get
147+
the correct dense result.
148+
149+
.. ipython:: python
150+
151+
arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1)
152+
np.abs(arr)
153+
np.abs(arr).to_dense()
154+
133155
.. _sparse.scipysparse:
134156

135157
Interaction with scipy.sparse

doc/source/whatsnew/v0.19.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,8 @@ Bug Fixes
759759
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
760760
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
761761
- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`)
762+
- Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`)
763+
- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`)
762764
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
763765
- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
764766
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)

pandas/sparse/array.py

+28
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,34 @@ def kind(self):
212212
elif isinstance(self.sp_index, IntIndex):
213213
return 'integer'
214214

215+
def __array_wrap__(self, out_arr, context=None):
216+
"""
217+
NumPy calls this method when ufunc is applied
218+
219+
Parameters
220+
----------
221+
222+
out_arr : ndarray
223+
ufunc result (note that ufunc is only applied to sp_values)
224+
context : tuple of 3 elements (ufunc, signature, domain)
225+
for example, following is a context when np.sin is applied to
226+
SparseArray,
227+
228+
(<ufunc 'sin'>, (SparseArray,), 0))
229+
230+
See http://docs.scipy.org/doc/numpy/user/basics.subclassing.html
231+
"""
232+
if isinstance(context, tuple) and len(context) == 3:
233+
ufunc, args, domain = context
234+
# to apply ufunc only to fill_value (to avoid recursive call)
235+
args = [getattr(a, 'fill_value', a) for a in args]
236+
fill_value = ufunc(self.fill_value, *args[1:])
237+
else:
238+
fill_value = self.fill_value
239+
240+
return self._simple_new(out_arr, sp_index=self.sp_index,
241+
fill_value=fill_value)
242+
215243
def __array_finalize__(self, obj):
216244
"""
217245
Gets called after any ufunc or other array operations, necessary

pandas/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False):
706706
new_series = {}
707707
for k, v in compat.iteritems(self):
708708
applied = func(v)
709-
applied.fill_value = func(applied.fill_value)
709+
applied.fill_value = func(v.fill_value)
710710
new_series[k] = applied
711711
return self._constructor(
712712
new_series, index=self.index, columns=self.columns,

pandas/sparse/series.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -307,13 +307,22 @@ def __unicode__(self):
307307
rep = '%s\n%s' % (series_rep, repr(self.sp_index))
308308
return rep
309309

310-
def __array_wrap__(self, result):
310+
def __array_wrap__(self, result, context=None):
311311
"""
312312
Gets called prior to a ufunc (and after)
313+
314+
See SparseArray.__array_wrap__ for detail.
313315
"""
316+
if isinstance(context, tuple) and len(context) == 3:
317+
ufunc, args, domain = context
318+
args = [getattr(a, 'fill_value', a) for a in args]
319+
fill_value = ufunc(self.fill_value, *args[1:])
320+
else:
321+
fill_value = self.fill_value
322+
314323
return self._constructor(result, index=self.index,
315324
sparse_index=self.sp_index,
316-
fill_value=self.fill_value,
325+
fill_value=fill_value,
317326
copy=False).__finalize__(self)
318327

319328
def __array_finalize__(self, obj):
@@ -434,10 +443,8 @@ def abs(self):
434443
-------
435444
abs: type of caller
436445
"""
437-
res_sp_values = np.abs(self.sp_values)
438-
return self._constructor(res_sp_values, index=self.index,
439-
sparse_index=self.sp_index,
440-
fill_value=self.fill_value).__finalize__(self)
446+
return self._constructor(np.abs(self.values),
447+
index=self.index).__finalize__(self)
441448

442449
def get(self, label, default=None):
443450
"""

pandas/sparse/tests/test_array.py

+46
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,52 @@ def test_numpy_mean(self):
829829
tm.assertRaisesRegexp(ValueError, msg, np.mean,
830830
SparseArray(data), out=out)
831831

832+
def test_ufunc(self):
833+
# GH 13853 make sure ufunc is applied to fill_value
834+
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
835+
result = SparseArray([1, np.nan, 2, np.nan, 2])
836+
tm.assert_sp_array_equal(abs(sparse), result)
837+
tm.assert_sp_array_equal(np.abs(sparse), result)
838+
839+
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
840+
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
841+
fill_value=1)
842+
tm.assert_sp_array_equal(abs(sparse), result)
843+
tm.assert_sp_array_equal(np.abs(sparse), result)
844+
845+
sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
846+
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
847+
fill_value=1)
848+
tm.assert_sp_array_equal(abs(sparse), result)
849+
tm.assert_sp_array_equal(np.abs(sparse), result)
850+
851+
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
852+
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
853+
tm.assert_sp_array_equal(np.sin(sparse), result)
854+
855+
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
856+
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
857+
tm.assert_sp_array_equal(np.sin(sparse), result)
858+
859+
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
860+
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
861+
tm.assert_sp_array_equal(np.sin(sparse), result)
862+
863+
def test_ufunc_args(self):
864+
# GH 13853 make sure ufunc is applied to fill_value, including its arg
865+
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
866+
result = SparseArray([2, np.nan, 3, np.nan, -1])
867+
tm.assert_sp_array_equal(np.add(sparse, 1), result)
868+
869+
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
870+
result = SparseArray([2, 0, 3, -1], fill_value=2)
871+
tm.assert_sp_array_equal(np.add(sparse, 1), result)
872+
873+
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
874+
result = SparseArray([2, 0, 1, -1], fill_value=1)
875+
tm.assert_sp_array_equal(np.add(sparse, 1), result)
876+
877+
832878
if __name__ == '__main__':
833879
import nose
834880
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/sparse/tests/test_series.py

+16
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,21 @@ def test_abs(self):
589589
tm.assert_sp_series_equal(result, expected)
590590
self.assertEqual(result.name, 'x')
591591

592+
s = SparseSeries([1, -2, 2, -3], fill_value=-2, name='x')
593+
expected = SparseSeries([1, 2, 3], sparse_index=s.sp_index,
594+
fill_value=2, name='x')
595+
result = s.abs()
596+
tm.assert_sp_series_equal(result, expected)
597+
self.assertEqual(result.name, 'x')
598+
599+
result = abs(s)
600+
tm.assert_sp_series_equal(result, expected)
601+
self.assertEqual(result.name, 'x')
602+
603+
result = np.abs(s)
604+
tm.assert_sp_series_equal(result, expected)
605+
self.assertEqual(result.name, 'x')
606+
592607
def test_reindex(self):
593608
def _compare_with_series(sps, new_index):
594609
spsre = sps.reindex(new_index)
@@ -1288,6 +1303,7 @@ def test_numpy_func_call(self):
12881303
for series in ('bseries', 'zbseries'):
12891304
getattr(np, func)(getattr(self, series))
12901305

1306+
12911307
if __name__ == '__main__':
12921308
import nose
12931309
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)