Skip to content

ENH/BUG: Sparse now supports comparison op #12971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ These changes conform sparse handling to return the correct types and work to ma
- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`)
- Bug in ``SparseArray`` mod raises ``AttributeError (:issue:`12910`)
- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`)
- Bug in ``SparseArray`` comparison output may incorrect result or raise ``ValueError`` (:issue:`12971`)
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`)
Expand Down
36 changes: 19 additions & 17 deletions pandas/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ def wrapper(self, other):
elif lib.isscalar(other):
new_fill_value = op(np.float64(self.fill_value), np.float64(other))

return SparseArray(op(self.sp_values, other),
sparse_index=self.sp_index,
fill_value=new_fill_value)
return _wrap_result(name, op(self.sp_values, other),
self.sp_index, new_fill_value)
else: # pragma: no cover
raise TypeError('operation with %s not supported' % type(other))

Expand All @@ -59,30 +58,32 @@ def wrapper(self, other):


def _sparse_array_op(left, right, op, name):
sparse_op = lambda a, b: _sparse_op(a, b, name)

if left.sp_index.equals(right.sp_index):
result = op(left.sp_values, right.sp_values)
result_index = left.sp_index
else:
result, result_index = sparse_op(left, right)

sparse_op = getattr(splib, 'sparse_%s' % name)
result, result_index = sparse_op(left.sp_values, left.sp_index,
left.fill_value, right.sp_values,
right.sp_index, right.fill_value)
try:
fill_value = op(left.fill_value, right.fill_value)
except:
fill_value = nan

return SparseArray(result, sparse_index=result_index,
fill_value=fill_value)
return _wrap_result(name, result, result_index, fill_value)


def _sparse_op(this, other, name):
sparse_op = getattr(splib, 'sparse_%s' % name)
result, result_index = sparse_op(this.sp_values, this.sp_index,
this.fill_value, other.sp_values,
other.sp_index, other.fill_value)

return result, result_index
def _wrap_result(name, data, sparse_index, fill_value):
""" wrap op result to have correct dtype """
if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
# ToDo: We can remove this condition when removing
# SparseArray's dtype default when closing GH 667
return SparseArray(data, sparse_index=sparse_index,
fill_value=fill_value,
dtype=np.bool)
else:
return SparseArray(data, sparse_index=sparse_index,
fill_value=fill_value)


class SparseArray(PandasObject, np.ndarray):
Expand Down Expand Up @@ -594,4 +595,5 @@ def _make_index(length, indices, kind):


ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method,
comp_method=_arith_method,
use_numexpr=False)
96 changes: 96 additions & 0 deletions pandas/sparse/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,19 @@ def test_constructor_bool(self):
self.assertEqual(dense.dtype, bool)
tm.assert_numpy_array_equal(dense, data)

def test_constructor_bool_fill_value(self):
arr = SparseArray([True, False, True], dtype=None)
self.assertEqual(arr.dtype, np.bool)
self.assertFalse(arr.fill_value)

arr = SparseArray([True, False, True], dtype=np.bool)
self.assertEqual(arr.dtype, np.bool)
self.assertFalse(arr.fill_value)

arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
self.assertEqual(arr.dtype, np.bool)
self.assertTrue(arr.fill_value)

def test_constructor_float32(self):
# GH 10648
data = np.array([1., np.nan, 3], dtype=np.float32)
Expand Down Expand Up @@ -522,6 +535,31 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense):
tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense)
tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense)

def _check_comparison_ops(self, a, b, a_dense, b_dense):

def _check(res):
tm.assertIsInstance(res, SparseArray)
self.assertEqual(res.dtype, np.bool)
self.assertIsInstance(res.fill_value, bool)

_check(a == b)
tm.assert_numpy_array_equal((a == b).to_dense(), a_dense == b_dense)

_check(a != b)
tm.assert_numpy_array_equal((a != b).to_dense(), a_dense != b_dense)

_check(a >= b)
tm.assert_numpy_array_equal((a >= b).to_dense(), a_dense >= b_dense)

_check(a <= b)
tm.assert_numpy_array_equal((a <= b).to_dense(), a_dense <= b_dense)

_check(a > b)
tm.assert_numpy_array_equal((a > b).to_dense(), a_dense > b_dense)

_check(a < b)
tm.assert_numpy_array_equal((a < b).to_dense(), a_dense < b_dense)

def test_float_scalar(self):
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])

Expand All @@ -541,6 +579,25 @@ def test_float_scalar(self):
self._check_numeric_ops(a, 0, values, 0)
self._check_numeric_ops(a, 3, values, 3)

def test_float_scalar_comparison(self):
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])

for kind in ['integer', 'block']:
a = SparseArray(values, kind=kind)
self._check_comparison_ops(a, 1, values, 1)
self._check_comparison_ops(a, 0, values, 0)
self._check_comparison_ops(a, 3, values, 3)

a = SparseArray(values, kind=kind, fill_value=0)
self._check_comparison_ops(a, 1, values, 1)
self._check_comparison_ops(a, 0, values, 0)
self._check_comparison_ops(a, 3, values, 3)

a = SparseArray(values, kind=kind, fill_value=2)
self._check_comparison_ops(a, 1, values, 1)
self._check_comparison_ops(a, 0, values, 0)
self._check_comparison_ops(a, 3, values, 3)

def test_float_same_index(self):
# when sp_index are the same
for kind in ['integer', 'block']:
Expand All @@ -558,6 +615,23 @@ def test_float_same_index(self):
b = SparseArray(rvalues, kind=kind, fill_value=0)
self._check_numeric_ops(a, b, values, rvalues)

def test_float_same_index_comparison(self):
# when sp_index are the same
for kind in ['integer', 'block']:
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])

a = SparseArray(values, kind=kind)
b = SparseArray(rvalues, kind=kind)
self._check_comparison_ops(a, b, values, rvalues)

values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])

a = SparseArray(values, kind=kind, fill_value=0)
b = SparseArray(rvalues, kind=kind, fill_value=0)
self._check_comparison_ops(a, b, values, rvalues)

def test_float_array(self):
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
Expand Down Expand Up @@ -601,6 +675,28 @@ def test_float_array_different_kind(self):
b = SparseArray(rvalues, kind='block', fill_value=2)
self._check_numeric_ops(a, b, values, rvalues)

def test_float_array_comparison(self):
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])

for kind in ['integer', 'block']:
a = SparseArray(values, kind=kind)
b = SparseArray(rvalues, kind=kind)
self._check_comparison_ops(a, b, values, rvalues)
self._check_comparison_ops(a, b * 0, values, rvalues * 0)

a = SparseArray(values, kind=kind, fill_value=0)
b = SparseArray(rvalues, kind=kind)
self._check_comparison_ops(a, b, values, rvalues)

a = SparseArray(values, kind=kind, fill_value=0)
b = SparseArray(rvalues, kind=kind, fill_value=0)
self._check_comparison_ops(a, b, values, rvalues)

a = SparseArray(values, kind=kind, fill_value=1)
b = SparseArray(rvalues, kind=kind, fill_value=2)
self._check_comparison_ops(a, b, values, rvalues)


if __name__ == '__main__':
import nose
Expand Down
47 changes: 41 additions & 6 deletions pandas/src/sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,12 @@ cdef inline float64_t __lt(float64_t a, float64_t b):
cdef inline float64_t __gt(float64_t a, float64_t b):
return a > b

cdef inline float64_t __le(float64_t a, float64_t b):
return a <= b

cdef inline float64_t __ge(float64_t a, float64_t b):
return a >= b

cdef inline float64_t __mod(float64_t a, float64_t b):
if b == 0:
return NaN
Expand Down Expand Up @@ -1040,33 +1046,62 @@ sparse_rtruediv = sparse_rdiv
cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __floordiv)
y, yindex, yfill, __floordiv)

cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __rfloordiv)
y, yindex, yfill, __rfloordiv)

cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __mod)
y, yindex, yfill, __mod)

cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __rmod)
y, yindex, yfill, __rmod)

cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __pow)
y, yindex, yfill, __pow)

cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __rpow)
y, yindex, yfill, __rpow)

cpdef sparse_eq(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __eq)

cpdef sparse_ne(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __ne)

cpdef sparse_lt(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __lt)

cpdef sparse_gt(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __gt)

cpdef sparse_le(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __le)

cpdef sparse_ge(ndarray x, SparseIndex xindex, float64_t xfill,
ndarray y, SparseIndex yindex, float64_t yfill):
return sparse_combine(x, xindex, xfill,
y, yindex, yfill, __ge)

#-------------------------------------------------------------------------------
# Indexing operations
Expand Down