Skip to content

Commit 471c4e7

Browse files
sinhrksjreback
authored andcommitted
ENH: bool sparse now supports logical op
Author: sinhrks <[email protected]> Closes #14000 from sinhrks/sparse_bool and squashes the following commits: 6db3096 [sinhrks] ENH: bool sparse now supports logical op
1 parent 8b50d8c commit 471c4e7

File tree

6 files changed

+811
-414
lines changed

6 files changed

+811
-414
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan`
762762
ValueError: unable to coerce current fill_value nan to int64 dtype
763763

764764
- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`)
765+
- ``SparseArray`` with ``bool`` dtype now supports logical (bool) operators (:issue:`14000`)
765766
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
766767
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
767768
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)

pandas/sparse/array.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def _sparse_array_op(left, right, op, name, series=False):
9898
right = right.astype(np.float64)
9999

100100
dtype = _maybe_match_dtype(left, right)
101+
result_dtype = None
101102

102103
if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
103104
result = op(left.get_values(), right.get_values())
@@ -116,13 +117,26 @@ def _sparse_array_op(left, right, op, name, series=False):
116117
left, right = right, left
117118
name = name[1:]
118119

119-
opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
120-
sparse_op = getattr(splib, opname)
120+
if name in ('and', 'or') and dtype == 'bool':
121+
opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
122+
# to make template simple, cast here
123+
left_sp_values = left.sp_values.view(np.uint8)
124+
right_sp_values = right.sp_values.view(np.uint8)
125+
result_dtype = np.bool
126+
else:
127+
opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
128+
left_sp_values = left.sp_values
129+
right_sp_values = right.sp_values
121130

122-
result, index, fill = sparse_op(left.sp_values, left.sp_index,
123-
left.fill_value, right.sp_values,
131+
sparse_op = getattr(splib, opname)
132+
result, index, fill = sparse_op(left_sp_values, left.sp_index,
133+
left.fill_value, right_sp_values,
124134
right.sp_index, right.fill_value)
125-
return _wrap_result(name, result, index, fill, dtype=result.dtype)
135+
136+
if result_dtype is None:
137+
result_dtype = result.dtype
138+
139+
return _wrap_result(name, result, index, fill, dtype=result_dtype)
126140

127141

128142
def _wrap_result(name, data, sparse_index, fill_value, dtype=None):
@@ -750,4 +764,5 @@ def _make_index(length, indices, kind):
750764

751765
ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method,
752766
comp_method=_arith_method,
767+
bool_method=_arith_method,
753768
use_numexpr=False)

pandas/sparse/tests/test_arithmetics.py

+44
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,20 @@ def _check_comparison_ops(self, a, b, a_dense, b_dense):
108108
self._check_bool_result(a < b_dense)
109109
self._assert((a < b_dense).to_dense(), a_dense < b_dense)
110110

111+
def _check_logical_ops(self, a, b, a_dense, b_dense):
112+
# sparse & sparse
113+
self._check_bool_result(a & b)
114+
self._assert((a & b).to_dense(), a_dense & b_dense)
115+
116+
self._check_bool_result(a | b)
117+
self._assert((a | b).to_dense(), a_dense | b_dense)
118+
# sparse & dense
119+
self._check_bool_result(a & b_dense)
120+
self._assert((a & b_dense).to_dense(), a_dense & b_dense)
121+
122+
self._check_bool_result(a | b_dense)
123+
self._assert((a | b_dense).to_dense(), a_dense | b_dense)
124+
111125
def test_float_scalar(self):
112126
values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan])
113127

@@ -305,6 +319,36 @@ def test_int_array_comparison(self):
305319
b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2)
306320
self._check_comparison_ops(a, b, values, rvalues)
307321

322+
def test_bool_same_index(self):
323+
# GH 14000
324+
# when sp_index are the same
325+
for kind in ['integer', 'block']:
326+
values = self._base([True, False, True, True], dtype=np.bool)
327+
rvalues = self._base([True, False, True, True], dtype=np.bool)
328+
329+
for fill_value in [True, False, np.nan]:
330+
a = self._klass(values, kind=kind, dtype=np.bool,
331+
fill_value=fill_value)
332+
b = self._klass(rvalues, kind=kind, dtype=np.bool,
333+
fill_value=fill_value)
334+
self._check_logical_ops(a, b, values, rvalues)
335+
336+
def test_bool_array_logical(self):
337+
# GH 14000
338+
# when sp_index are the same
339+
for kind in ['integer', 'block']:
340+
values = self._base([True, False, True, False, True, True],
341+
dtype=np.bool)
342+
rvalues = self._base([True, False, False, True, False, True],
343+
dtype=np.bool)
344+
345+
for fill_value in [True, False, np.nan]:
346+
a = self._klass(values, kind=kind, dtype=np.bool,
347+
fill_value=fill_value)
348+
b = self._klass(rvalues, kind=kind, dtype=np.bool,
349+
fill_value=fill_value)
350+
self._check_logical_ops(a, b, values, rvalues)
351+
308352

309353
class TestSparseSeriesArithmetic(TestSparseArrayArithmetics):
310354

pandas/src/sparse.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,7 @@ cdef class BlockUnion(BlockMerge):
758758
include "sparse_op_helper.pxi"
759759

760760

761+
761762
#-------------------------------------------------------------------------------
762763
# Indexing operations
763764

0 commit comments

Comments
 (0)