Skip to content

Commit d57807c

Browse files
committed
ENH/BUG: Sparse now supports comparison op
1 parent a544e9e commit d57807c

File tree

4 files changed

+157
-23
lines changed

4 files changed

+157
-23
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ These changes conform sparse handling to return the correct types and work to ma
121121
- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`)
122122
- Bug in ``SparseArray`` mod raises ``AttributeError (:issue:`12910`)
123123
- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`)
124+
- Bug in ``SparseArray`` comparison output may incorrect result or raise ``ValueError`` (:issue:`12971`)
124125
- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`)
125126
- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`)
126127
- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`)

pandas/sparse/array.py

+19-17
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ def wrapper(self, other):
4646
elif lib.isscalar(other):
4747
new_fill_value = op(np.float64(self.fill_value), np.float64(other))
4848

49-
return SparseArray(op(self.sp_values, other),
50-
sparse_index=self.sp_index,
51-
fill_value=new_fill_value)
49+
return _wrap_result(name, op(self.sp_values, other),
50+
self.sp_index, new_fill_value)
5251
else: # pragma: no cover
5352
raise TypeError('operation with %s not supported' % type(other))
5453

@@ -59,30 +58,32 @@ def wrapper(self, other):
5958

6059

6160
def _sparse_array_op(left, right, op, name):
62-
sparse_op = lambda a, b: _sparse_op(a, b, name)
63-
6461
if left.sp_index.equals(right.sp_index):
6562
result = op(left.sp_values, right.sp_values)
6663
result_index = left.sp_index
6764
else:
68-
result, result_index = sparse_op(left, right)
69-
65+
sparse_op = getattr(splib, 'sparse_%s' % name)
66+
result, result_index = sparse_op(left.sp_values, left.sp_index,
67+
left.fill_value, right.sp_values,
68+
right.sp_index, right.fill_value)
7069
try:
7170
fill_value = op(left.fill_value, right.fill_value)
7271
except:
7372
fill_value = nan
74-
75-
return SparseArray(result, sparse_index=result_index,
76-
fill_value=fill_value)
73+
return _wrap_result(name, result, result_index, fill_value)
7774

7875

79-
def _sparse_op(this, other, name):
80-
sparse_op = getattr(splib, 'sparse_%s' % name)
81-
result, result_index = sparse_op(this.sp_values, this.sp_index,
82-
this.fill_value, other.sp_values,
83-
other.sp_index, other.fill_value)
84-
85-
return result, result_index
76+
def _wrap_result(name, data, sparse_index, fill_value):
77+
""" wrap op result to have correct dtype """
78+
if name in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
79+
# ToDo: We can remove this condition when removing
80+
# SparseArray's dtype default when closing GH 667
81+
return SparseArray(data, sparse_index=sparse_index,
82+
fill_value=fill_value,
83+
dtype=np.bool)
84+
else:
85+
return SparseArray(data, sparse_index=sparse_index,
86+
fill_value=fill_value)
8687

8788

8889
class SparseArray(PandasObject, np.ndarray):
@@ -594,4 +595,5 @@ def _make_index(length, indices, kind):
594595

595596

596597
ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method,
598+
comp_method=_arith_method,
597599
use_numexpr=False)

pandas/sparse/tests/test_array.py

+96
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,19 @@ def test_constructor_bool(self):
262262
self.assertEqual(dense.dtype, bool)
263263
tm.assert_numpy_array_equal(dense, data)
264264

265+
def test_constructor_bool_fill_value(self):
266+
arr = SparseArray([True, False, True], dtype=None)
267+
self.assertEqual(arr.dtype, np.bool)
268+
self.assertFalse(arr.fill_value)
269+
270+
arr = SparseArray([True, False, True], dtype=np.bool)
271+
self.assertEqual(arr.dtype, np.bool)
272+
self.assertFalse(arr.fill_value)
273+
274+
arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
275+
self.assertEqual(arr.dtype, np.bool)
276+
self.assertTrue(arr.fill_value)
277+
265278
def test_constructor_float32(self):
266279
# GH 10648
267280
data = np.array([1., np.nan, 3], dtype=np.float32)
@@ -522,6 +535,31 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense):
522535
tm.assert_numpy_array_equal((a ** b).to_dense(), a_dense ** b_dense)
523536
tm.assert_numpy_array_equal((b ** a).to_dense(), b_dense ** a_dense)
524537

538+
def _check_comparison_ops(self, a, b, a_dense, b_dense):
539+
540+
def _check(res):
541+
tm.assertIsInstance(res, SparseArray)
542+
self.assertEqual(res.dtype, np.bool)
543+
self.assertIsInstance(res.fill_value, bool)
544+
545+
_check(a == b)
546+
tm.assert_numpy_array_equal((a == b).to_dense(), a_dense == b_dense)
547+
548+
_check(a != b)
549+
tm.assert_numpy_array_equal((a != b).to_dense(), a_dense != b_dense)
550+
551+
_check(a >= b)
552+
tm.assert_numpy_array_equal((a >= b).to_dense(), a_dense >= b_dense)
553+
554+
_check(a <= b)
555+
tm.assert_numpy_array_equal((a <= b).to_dense(), a_dense <= b_dense)
556+
557+
_check(a > b)
558+
tm.assert_numpy_array_equal((a > b).to_dense(), a_dense > b_dense)
559+
560+
_check(a < b)
561+
tm.assert_numpy_array_equal((a < b).to_dense(), a_dense < b_dense)
562+
525563
def test_float_scalar(self):
526564
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
527565

@@ -541,6 +579,25 @@ def test_float_scalar(self):
541579
self._check_numeric_ops(a, 0, values, 0)
542580
self._check_numeric_ops(a, 3, values, 3)
543581

582+
def test_float_scalar_comparison(self):
583+
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
584+
585+
for kind in ['integer', 'block']:
586+
a = SparseArray(values, kind=kind)
587+
self._check_comparison_ops(a, 1, values, 1)
588+
self._check_comparison_ops(a, 0, values, 0)
589+
self._check_comparison_ops(a, 3, values, 3)
590+
591+
a = SparseArray(values, kind=kind, fill_value=0)
592+
self._check_comparison_ops(a, 1, values, 1)
593+
self._check_comparison_ops(a, 0, values, 0)
594+
self._check_comparison_ops(a, 3, values, 3)
595+
596+
a = SparseArray(values, kind=kind, fill_value=2)
597+
self._check_comparison_ops(a, 1, values, 1)
598+
self._check_comparison_ops(a, 0, values, 0)
599+
self._check_comparison_ops(a, 3, values, 3)
600+
544601
def test_float_same_index(self):
545602
# when sp_index are the same
546603
for kind in ['integer', 'block']:
@@ -558,6 +615,23 @@ def test_float_same_index(self):
558615
b = SparseArray(rvalues, kind=kind, fill_value=0)
559616
self._check_numeric_ops(a, b, values, rvalues)
560617

618+
def test_float_same_index_comparison(self):
619+
# when sp_index are the same
620+
for kind in ['integer', 'block']:
621+
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
622+
rvalues = np.array([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan])
623+
624+
a = SparseArray(values, kind=kind)
625+
b = SparseArray(rvalues, kind=kind)
626+
self._check_comparison_ops(a, b, values, rvalues)
627+
628+
values = np.array([0., 1., 2., 6., 0., 0., 1., 2., 1., 0.])
629+
rvalues = np.array([0., 2., 3., 4., 0., 0., 1., 3., 2., 0.])
630+
631+
a = SparseArray(values, kind=kind, fill_value=0)
632+
b = SparseArray(rvalues, kind=kind, fill_value=0)
633+
self._check_comparison_ops(a, b, values, rvalues)
634+
561635
def test_float_array(self):
562636
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
563637
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
@@ -601,6 +675,28 @@ def test_float_array_different_kind(self):
601675
b = SparseArray(rvalues, kind='block', fill_value=2)
602676
self._check_numeric_ops(a, b, values, rvalues)
603677

678+
def test_float_array_comparison(self):
679+
values = np.array([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
680+
rvalues = np.array([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan])
681+
682+
for kind in ['integer', 'block']:
683+
a = SparseArray(values, kind=kind)
684+
b = SparseArray(rvalues, kind=kind)
685+
self._check_comparison_ops(a, b, values, rvalues)
686+
self._check_comparison_ops(a, b * 0, values, rvalues * 0)
687+
688+
a = SparseArray(values, kind=kind, fill_value=0)
689+
b = SparseArray(rvalues, kind=kind)
690+
self._check_comparison_ops(a, b, values, rvalues)
691+
692+
a = SparseArray(values, kind=kind, fill_value=0)
693+
b = SparseArray(rvalues, kind=kind, fill_value=0)
694+
self._check_comparison_ops(a, b, values, rvalues)
695+
696+
a = SparseArray(values, kind=kind, fill_value=1)
697+
b = SparseArray(rvalues, kind=kind, fill_value=2)
698+
self._check_comparison_ops(a, b, values, rvalues)
699+
604700

605701
if __name__ == '__main__':
606702
import nose

pandas/src/sparse.pyx

+41-6
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,12 @@ cdef inline float64_t __lt(float64_t a, float64_t b):
985985
cdef inline float64_t __gt(float64_t a, float64_t b):
986986
return a > b
987987

988+
cdef inline float64_t __le(float64_t a, float64_t b):
989+
return a <= b
990+
991+
cdef inline float64_t __ge(float64_t a, float64_t b):
992+
return a >= b
993+
988994
cdef inline float64_t __mod(float64_t a, float64_t b):
989995
if b == 0:
990996
return NaN
@@ -1040,33 +1046,62 @@ sparse_rtruediv = sparse_rdiv
10401046
cpdef sparse_floordiv(ndarray x, SparseIndex xindex, float64_t xfill,
10411047
ndarray y, SparseIndex yindex, float64_t yfill):
10421048
return sparse_combine(x, xindex, xfill,
1043-
y, yindex, yfill, __floordiv)
1049+
y, yindex, yfill, __floordiv)
10441050

10451051
cpdef sparse_rfloordiv(ndarray x, SparseIndex xindex, float64_t xfill,
10461052
ndarray y, SparseIndex yindex, float64_t yfill):
10471053
return sparse_combine(x, xindex, xfill,
1048-
y, yindex, yfill, __rfloordiv)
1054+
y, yindex, yfill, __rfloordiv)
10491055

10501056
cpdef sparse_mod(ndarray x, SparseIndex xindex, float64_t xfill,
10511057
ndarray y, SparseIndex yindex, float64_t yfill):
10521058
return sparse_combine(x, xindex, xfill,
1053-
y, yindex, yfill, __mod)
1059+
y, yindex, yfill, __mod)
10541060

10551061
cpdef sparse_rmod(ndarray x, SparseIndex xindex, float64_t xfill,
10561062
ndarray y, SparseIndex yindex, float64_t yfill):
10571063
return sparse_combine(x, xindex, xfill,
1058-
y, yindex, yfill, __rmod)
1064+
y, yindex, yfill, __rmod)
10591065

10601066
cpdef sparse_pow(ndarray x, SparseIndex xindex, float64_t xfill,
10611067
ndarray y, SparseIndex yindex, float64_t yfill):
10621068
return sparse_combine(x, xindex, xfill,
1063-
y, yindex, yfill, __pow)
1069+
y, yindex, yfill, __pow)
10641070

10651071
cpdef sparse_rpow(ndarray x, SparseIndex xindex, float64_t xfill,
10661072
ndarray y, SparseIndex yindex, float64_t yfill):
10671073
return sparse_combine(x, xindex, xfill,
1068-
y, yindex, yfill, __rpow)
1074+
y, yindex, yfill, __rpow)
1075+
1076+
cpdef sparse_eq(ndarray x, SparseIndex xindex, float64_t xfill,
1077+
ndarray y, SparseIndex yindex, float64_t yfill):
1078+
return sparse_combine(x, xindex, xfill,
1079+
y, yindex, yfill, __eq)
10691080

1081+
cpdef sparse_ne(ndarray x, SparseIndex xindex, float64_t xfill,
1082+
ndarray y, SparseIndex yindex, float64_t yfill):
1083+
return sparse_combine(x, xindex, xfill,
1084+
y, yindex, yfill, __ne)
1085+
1086+
cpdef sparse_lt(ndarray x, SparseIndex xindex, float64_t xfill,
1087+
ndarray y, SparseIndex yindex, float64_t yfill):
1088+
return sparse_combine(x, xindex, xfill,
1089+
y, yindex, yfill, __lt)
1090+
1091+
cpdef sparse_gt(ndarray x, SparseIndex xindex, float64_t xfill,
1092+
ndarray y, SparseIndex yindex, float64_t yfill):
1093+
return sparse_combine(x, xindex, xfill,
1094+
y, yindex, yfill, __gt)
1095+
1096+
cpdef sparse_le(ndarray x, SparseIndex xindex, float64_t xfill,
1097+
ndarray y, SparseIndex yindex, float64_t yfill):
1098+
return sparse_combine(x, xindex, xfill,
1099+
y, yindex, yfill, __le)
1100+
1101+
cpdef sparse_ge(ndarray x, SparseIndex xindex, float64_t xfill,
1102+
ndarray y, SparseIndex yindex, float64_t yfill):
1103+
return sparse_combine(x, xindex, xfill,
1104+
y, yindex, yfill, __ge)
10701105

10711106
#-------------------------------------------------------------------------------
10721107
# Indexing operations

0 commit comments

Comments
 (0)