diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index cc2269afa6e61..ccfdd5dc7372e 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -121,6 +121,7 @@ These changes conform sparse handling to return the correct types and work to ma - Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`) - Bug in ``SparseArray`` mod raises ``AttributeError (:issue:`12910`) - Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`) +- Bug in ``SparseArray`` comparison output may incorrect result or raise ``ValueError`` (:issue:`12971`) - Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) - Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) - Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index ff199276c1401..b080f2eb45a90 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -46,9 +46,8 @@ def wrapper(self, other): elif lib.isscalar(other): new_fill_value = op(np.float64(self.fill_value), np.float64(other)) - return SparseArray(op(self.sp_values, other), - sparse_index=self.sp_index, - fill_value=new_fill_value) + return _wrap_result(name, op(self.sp_values, other), + self.sp_index, new_fill_value) else: # pragma: no cover raise TypeError('operation with %s not supported' % type(other)) @@ -59,30 +58,32 @@ def wrapper(self, other): def _sparse_array_op(left, right, op, name): - sparse_op = lambda a, b: _sparse_op(a, b, name) - if left.sp_index.equals(right.sp_index): result = op(left.sp_values, right.sp_values) result_index = left.sp_index else: - result, result_index = sparse_op(left, right) - + sparse_op = getattr(splib, 'sparse_%s' % name) + result, result_index = sparse_op(left.sp_values, left.sp_index, + left.fill_value, right.sp_values, + right.sp_index, right.fill_value) try: fill_value = op(left.fill_value, right.fill_value) except: fill_value = nan - - return SparseArray(result, sparse_index=result_index, - fill_value=fill_value) + return _wrap_result(name, result, result_index, fill_value) -def _sparse_op(this, other, name): - sparse_op = getattr(splib, 'sparse_%s' % name) - result, result_index = sparse_op(this.sp_values, this.sp_index, - this.fill_value, other.sp_values, - other.sp_index, other.fill_value) - - return result, result_index +def _wrap_result(name, data, sparse_index, fill_value): + """ wrap op result to have correct dtype """ + if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): + # ToDo: We can remove this condition when removing + # SparseArray's dtype default when closing GH 667 + return SparseArray(data, sparse_index=sparse_index, + fill_value=fill_value, + dtype=np.bool) + else: + return SparseArray(data, sparse_index=sparse_index, + fill_value=fill_value) class SparseArray(PandasObject, np.ndarray): @@ -594,4 +595,5 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, + comp_method=_arith_method, use_numexpr=False) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 862b67cf74411..2a905597c7fa0 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -262,6 +262,19 @@ def test_constructor_bool(self): self.assertEqual(dense.dtype, bool) tm.assert_numpy_array_equal(dense, data) + def test_constructor_bool_fill_value(self): + arr = SparseArray([True, False, True], dtype=None) + self.assertEqual(arr.dtype, np.bool) + self.assertFalse(arr.fill_value) + + arr = SparseArray([True, False, True], dtype=np.bool) + self.assertEqual(arr.dtype, np.bool) + self.assertFalse(arr.fill_value) + + arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) + self.assertEqual(arr.dtype, np.bool) + self.assertTrue(arr.fill_value) + def test_constructor_float32(self): # GH 10648 data = np.array([1., np.nan, 3], dtype=np.float32) @@ -522,6 +535,31 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense): tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense) tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense) + def _check_comparison_ops(self, a, b, a_dense, b_dense): + + def _check(res): + tm.assertIsInstance(res, SparseArray) + self.assertEqual(res.dtype, np.bool) + self.assertIsInstance(res.fill_value, bool) + + _check(a == b) + tm.assert_numpy_array_equal((a == b).to_dense(), a_dense == b_dense) + + _check(a != b) + tm.assert_numpy_array_equal((a != b).to_dense(), a_dense != b_dense) + + _check(a >= b) + tm.assert_numpy_array_equal((a >= b).to_dense(), a_dense >= b_dense) + + _check(a <= b) + tm.assert_numpy_array_equal((a <= b).to_dense(), a_dense <= b_dense) + + _check(a > b) + tm.assert_numpy_array_equal((a > b).to_dense(), a_dense > b_dense) + + _check(a < b) + tm.assert_numpy_array_equal((a < b).to_dense(), a_dense < b_dense) + def test_float_scalar(self): values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) @@ -541,6 +579,25 @@ def test_float_scalar(self): self._check_numeric_ops(a, 0, values, 0) self._check_numeric_ops(a, 3, values, 3) + def test_float_scalar_comparison(self): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + for kind in ['integer', 'block']: + a = SparseArray(values, kind=kind) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = SparseArray(values, kind=kind, fill_value=0) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = SparseArray(values, kind=kind, fill_value=2) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + def test_float_same_index(self): # when sp_index are the same for kind in ['integer', 'block']: @@ -558,6 +615,23 @@ def test_float_same_index(self): b = SparseArray(rvalues, kind=kind, fill_value=0) self._check_numeric_ops(a, b, values, rvalues) + def test_float_same_index_comparison(self): + # when sp_index are the same + for kind in ['integer', 'block']: + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.]) + rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.]) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + def test_float_array(self): values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) @@ -601,6 +675,28 @@ def test_float_array_different_kind(self): b = SparseArray(rvalues, kind='block', fill_value=2) self._check_numeric_ops(a, b, values, rvalues) + def test_float_array_comparison(self): + values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + for kind in ['integer', 'block']: + a = SparseArray(values, kind=kind) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=0) + b = SparseArray(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = SparseArray(values, kind=kind, fill_value=1) + b = SparseArray(rvalues, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + if __name__ == '__main__': import nose diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx index 5d523fcfc2778..cb25158e471c7 100644 --- a/pandas/src/sparse.pyx +++ b/pandas/src/sparse.pyx @@ -985,6 +985,12 @@ cdef inline float64_t __lt(float64_t a, float64_t b): cdef inline float64_t __gt(float64_t a, float64_t b): return a > b +cdef inline float64_t __le(float64_t a, float64_t b): + return a <= b + +cdef inline float64_t __ge(float64_t a, float64_t b): + return a >= b + cdef inline float64_t __mod(float64_t a, float64_t b): if b == 0: return NaN @@ -1040,33 +1046,62 @@ sparse_rtruediv = sparse_rdiv cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __floordiv) + y, yindex, yfill, __floordiv) cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __rfloordiv) + y, yindex, yfill, __rfloordiv) cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __mod) + y, yindex, yfill, __mod) cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __rmod) + y, yindex, yfill, __rmod) cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __pow) + y, yindex, yfill, __pow) cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill, ndarray y, SparseIndex yindex, float64_t yfill): return sparse_combine(x, xindex, xfill, - y, yindex, yfill, __rpow) + y, yindex, yfill, __rpow) + +cpdef sparse_eq(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __eq) +cpdef sparse_ne(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __ne) + +cpdef sparse_lt(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __lt) + +cpdef sparse_gt(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __gt) + +cpdef sparse_le(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __le) + +cpdef sparse_ge(ndarray x, SparseIndex xindex, float64_t xfill, + ndarray y, SparseIndex yindex, float64_t yfill): + return sparse_combine(x, xindex, xfill, + y, yindex, yfill, __ge) #------------------------------------------------------------------------------- # Indexing operations