diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index 41ed0bf16ebae..322e6ff7982ee 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -130,6 +130,28 @@ keeps an arrays of all of the locations where the data are not equal to the fill value. The ``block`` format tracks only the locations and sizes of blocks of data. +.. _sparse.calculation: + +Sparse Calculation +------------------ + +You can apply NumPy ufuncs to ``SparseArray`` and get ``SparseArray`` result. + +.. ipython:: python + + arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan]) + np.abs(arr) + + +Note that ufunc is also applied to ``fill_value``. This is needed to get +the correct dense result. + +.. ipython:: python + + arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1) + np.abs(arr) + np.abs(arr).to_dense() + .. _sparse.scipysparse: Interaction with scipy.sparse diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 0c60aeeae333b..e8d52ed8fbe29 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -759,6 +759,8 @@ Bug Fixes - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) - Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) - Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) +- Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) +- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) - Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) - Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index a0dbb35bffe92..5e36bc514b419 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -212,6 +212,34 @@ def kind(self): elif isinstance(self.sp_index, IntIndex): return 'integer' + def __array_wrap__(self, out_arr, context=None): + """ + NumPy calls this method when ufunc is applied + + Parameters + ---------- + + out_arr : ndarray + ufunc result (note that ufunc is only applied to sp_values) + context : tuple of 3 elements (ufunc, signature, domain) + for example, following is a context when np.sin is applied to + SparseArray, + + (, (SparseArray,), 0)) + + See http://docs.scipy.org/doc/numpy/user/basics.subclassing.html + """ + if isinstance(context, tuple) and len(context) == 3: + ufunc, args, domain = context + # to apply ufunc only to fill_value (to avoid recursive call) + args = [getattr(a, 'fill_value', a) for a in args] + fill_value = ufunc(self.fill_value, *args[1:]) + else: + fill_value = self.fill_value + + return self._simple_new(out_arr, sp_index=self.sp_index, + fill_value=fill_value) + def __array_finalize__(self, obj): """ Gets called after any ufunc or other array operations, necessary diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 985899e6c6b79..b6a1e1e48c5c4 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -706,7 +706,7 @@ def apply(self, func, axis=0, broadcast=False, reduce=False): new_series = {} for k, v in compat.iteritems(self): applied = func(v) - applied.fill_value = func(applied.fill_value) + applied.fill_value = func(v.fill_value) new_series[k] = applied return self._constructor( new_series, index=self.index, columns=self.columns, diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 6c4392dbf7cb4..e8f4feffb725f 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -307,13 +307,22 @@ def __unicode__(self): rep = '%s\n%s' % (series_rep, repr(self.sp_index)) return rep - def __array_wrap__(self, result): + def __array_wrap__(self, result, context=None): """ Gets called prior to a ufunc (and after) + + See SparseArray.__array_wrap__ for detail. """ + if isinstance(context, tuple) and len(context) == 3: + ufunc, args, domain = context + args = [getattr(a, 'fill_value', a) for a in args] + fill_value = ufunc(self.fill_value, *args[1:]) + else: + fill_value = self.fill_value + return self._constructor(result, index=self.index, sparse_index=self.sp_index, - fill_value=self.fill_value, + fill_value=fill_value, copy=False).__finalize__(self) def __array_finalize__(self, obj): @@ -434,10 +443,8 @@ def abs(self): ------- abs: type of caller """ - res_sp_values = np.abs(self.sp_values) - return self._constructor(res_sp_values, index=self.index, - sparse_index=self.sp_index, - fill_value=self.fill_value).__finalize__(self) + return self._constructor(np.abs(self.values), + index=self.index).__finalize__(self) def get(self, label, default=None): """ diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index dd2126d0f52d2..dcd5df3791fcb 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -829,6 +829,52 @@ def test_numpy_mean(self): tm.assertRaisesRegexp(ValueError, msg, np.mean, SparseArray(data), out=out) + def test_ufunc(self): + # GH 13853 make sure ufunc is applied to fill_value + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([1, np.nan, 2, np.nan, 2]) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, + fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=-1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, + fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + def test_ufunc_args(self): + # GH 13853 make sure ufunc is applied to fill_value, including its arg + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([2, np.nan, 3, np.nan, -1]) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([2, 0, 3, -1], fill_value=2) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray([2, 0, 1, -1], fill_value=1) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index f9ac7d9d34072..c5480973b46bc 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -589,6 +589,21 @@ def test_abs(self): tm.assert_sp_series_equal(result, expected) self.assertEqual(result.name, 'x') + s = SparseSeries([1, -2, 2, -3], fill_value=-2, name='x') + expected = SparseSeries([1, 2, 3], sparse_index=s.sp_index, + fill_value=2, name='x') + result = s.abs() + tm.assert_sp_series_equal(result, expected) + self.assertEqual(result.name, 'x') + + result = abs(s) + tm.assert_sp_series_equal(result, expected) + self.assertEqual(result.name, 'x') + + result = np.abs(s) + tm.assert_sp_series_equal(result, expected) + self.assertEqual(result.name, 'x') + def test_reindex(self): def _compare_with_series(sps, new_index): spsre = sps.reindex(new_index) @@ -1288,6 +1303,7 @@ def test_numpy_func_call(self): for series in ('bseries', 'zbseries'): getattr(np, func)(getattr(self, series)) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],