From 6db309637fb81e168be98b2d0094cf6951ee8bc7 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 14 Aug 2016 12:44:39 +0900 Subject: [PATCH] ENH: bool sparse now supports logical op --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/sparse/array.py | 25 +- pandas/sparse/tests/test_arithmetics.py | 44 + pandas/src/sparse.pyx | 1 + pandas/src/sparse_op_helper.pxi | 1110 +++++++++++++++-------- pandas/src/sparse_op_helper.pxi.in | 44 +- 6 files changed, 811 insertions(+), 414 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 3600b8f52873b..dd7fd598bbc00 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -762,6 +762,7 @@ Note that the limitation is applied to ``fill_value`` which default is ``np.nan` ValueError: unable to coerce current fill_value nan to int64 dtype - Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`) +- ``SparseArray`` with ``bool`` dtype now supports logical (bool) operators (:issue:`14000`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index d14a8eadddc13..8d564d0abbf3f 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -98,6 +98,7 @@ def _sparse_array_op(left, right, op, name, series=False): right = right.astype(np.float64) dtype = _maybe_match_dtype(left, right) + result_dtype = None if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: result = op(left.get_values(), right.get_values()) @@ -116,13 +117,26 @@ def _sparse_array_op(left, right, op, name, series=False): left, right = right, left name = name[1:] - opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) - sparse_op = getattr(splib, opname) + if name in ('and', 'or') and dtype == 'bool': + opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype) + # to make template simple, cast here + left_sp_values = left.sp_values.view(np.uint8) + right_sp_values = right.sp_values.view(np.uint8) + result_dtype = np.bool + else: + opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype) + left_sp_values = left.sp_values + right_sp_values = right.sp_values - result, index, fill = sparse_op(left.sp_values, left.sp_index, - left.fill_value, right.sp_values, + sparse_op = getattr(splib, opname) + result, index, fill = sparse_op(left_sp_values, left.sp_index, + left.fill_value, right_sp_values, right.sp_index, right.fill_value) - return _wrap_result(name, result, index, fill, dtype=result.dtype) + + if result_dtype is None: + result_dtype = result.dtype + + return _wrap_result(name, result, index, fill, dtype=result_dtype) def _wrap_result(name, data, sparse_index, fill_value, dtype=None): @@ -750,4 +764,5 @@ def _make_index(length, indices, kind): ops.add_special_arithmetic_methods(SparseArray, arith_method=_arith_method, comp_method=_arith_method, + bool_method=_arith_method, use_numexpr=False) diff --git a/pandas/sparse/tests/test_arithmetics.py b/pandas/sparse/tests/test_arithmetics.py index b5945151db678..ec8bc4d8634e6 100644 --- a/pandas/sparse/tests/test_arithmetics.py +++ b/pandas/sparse/tests/test_arithmetics.py @@ -108,6 +108,20 @@ def _check_comparison_ops(self, a, b, a_dense, b_dense): self._check_bool_result(a < b_dense) self._assert((a < b_dense).to_dense(), a_dense < b_dense) + def _check_logical_ops(self, a, b, a_dense, b_dense): + # sparse & sparse + self._check_bool_result(a & b) + self._assert((a & b).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b) + self._assert((a | b).to_dense(), a_dense | b_dense) + # sparse & dense + self._check_bool_result(a & b_dense) + self._assert((a & b_dense).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b_dense) + self._assert((a | b_dense).to_dense(), a_dense | b_dense) + def test_float_scalar(self): values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) @@ -305,6 +319,36 @@ def test_int_array_comparison(self): b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2) self._check_comparison_ops(a, b, values, rvalues) + def test_bool_same_index(self): + # GH 14000 + # when sp_index are the same + for kind in ['integer', 'block']: + values = self._base([True, False, True, True], dtype=np.bool) + rvalues = self._base([True, False, True, True], dtype=np.bool) + + for fill_value in [True, False, np.nan]: + a = self._klass(values, kind=kind, dtype=np.bool, + fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool, + fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + def test_bool_array_logical(self): + # GH 14000 + # when sp_index are the same + for kind in ['integer', 'block']: + values = self._base([True, False, True, False, True, True], + dtype=np.bool) + rvalues = self._base([True, False, False, True, False, True], + dtype=np.bool) + + for fill_value in [True, False, np.nan]: + a = self._klass(values, kind=kind, dtype=np.bool, + fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool, + fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + class TestSparseSeriesArithmetic(TestSparseArrayArithmetics): diff --git a/pandas/src/sparse.pyx b/pandas/src/sparse.pyx index 646f9126b984c..88eb4cf13815b 100644 --- a/pandas/src/sparse.pyx +++ b/pandas/src/sparse.pyx @@ -758,6 +758,7 @@ cdef class BlockUnion(BlockMerge): include "sparse_op_helper.pxi" + #------------------------------------------------------------------------------- # Indexing operations diff --git a/pandas/src/sparse_op_helper.pxi b/pandas/src/sparse_op_helper.pxi index 5ff96469195e3..8462c31c84679 100644 --- a/pandas/src/sparse_op_helper.pxi +++ b/pandas/src/sparse_op_helper.pxi @@ -248,20 +248,6 @@ cpdef sparse_add_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_add_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = x[i] + y[i] - return out - - cpdef sparse_fill_add_float64(float64_t xfill, float64_t yfill): return xfill + yfill @@ -443,20 +429,6 @@ cpdef sparse_add_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_add_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = x[i] + y[i] - return out - - cpdef sparse_fill_add_int64(int64_t xfill, int64_t yfill): return xfill + yfill @@ -638,20 +610,6 @@ cpdef sparse_sub_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_sub_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = x[i] - y[i] - return out - - cpdef sparse_fill_sub_float64(float64_t xfill, float64_t yfill): return xfill - yfill @@ -833,20 +791,6 @@ cpdef sparse_sub_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_sub_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = x[i] - y[i] - return out - - cpdef sparse_fill_sub_int64(int64_t xfill, int64_t yfill): return xfill - yfill @@ -1028,20 +972,6 @@ cpdef sparse_mul_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_mul_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = x[i] * y[i] - return out - - cpdef sparse_fill_mul_float64(float64_t xfill, float64_t yfill): return xfill * yfill @@ -1223,20 +1153,6 @@ cpdef sparse_mul_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_mul_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = x[i] * y[i] - return out - - cpdef sparse_fill_mul_int64(int64_t xfill, int64_t yfill): return xfill * yfill @@ -1418,20 +1334,6 @@ cpdef sparse_div_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_div_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __div_float64(x[i], y[i]) - return out - - cpdef sparse_fill_div_float64(float64_t xfill, float64_t yfill): return __div_float64(xfill, yfill) @@ -1613,20 +1515,6 @@ cpdef sparse_div_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_div_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __div_int64(x[i], y[i]) - return out - - cpdef sparse_fill_div_int64(int64_t xfill, int64_t yfill): return __div_int64(xfill, yfill) @@ -1808,20 +1696,6 @@ cpdef sparse_mod_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_mod_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __mod_float64(x[i], y[i]) - return out - - cpdef sparse_fill_mod_float64(float64_t xfill, float64_t yfill): return __mod_float64(xfill, yfill) @@ -2003,20 +1877,6 @@ cpdef sparse_mod_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_mod_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = __mod_int64(x[i], y[i]) - return out - - cpdef sparse_fill_mod_int64(int64_t xfill, int64_t yfill): return __mod_int64(xfill, yfill) @@ -2198,20 +2058,6 @@ cpdef sparse_truediv_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_truediv_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __truediv_float64(x[i], y[i]) - return out - - cpdef sparse_fill_truediv_float64(float64_t xfill, float64_t yfill): return __truediv_float64(xfill, yfill) @@ -2393,20 +2239,6 @@ cpdef sparse_truediv_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_truediv_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __truediv_int64(x[i], y[i]) - return out - - cpdef sparse_fill_truediv_int64(int64_t xfill, int64_t yfill): return __truediv_int64(xfill, yfill) @@ -2588,20 +2420,6 @@ cpdef sparse_floordiv_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_floordiv_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = __floordiv_float64(x[i], y[i]) - return out - - cpdef sparse_fill_floordiv_float64(float64_t xfill, float64_t yfill): return __floordiv_float64(xfill, yfill) @@ -2783,20 +2601,6 @@ cpdef sparse_floordiv_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_floordiv_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = __floordiv_int64(x[i], y[i]) - return out - - cpdef sparse_fill_floordiv_int64(int64_t xfill, int64_t yfill): return __floordiv_int64(xfill, yfill) @@ -2978,20 +2782,6 @@ cpdef sparse_pow_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_pow_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[float64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.float64) - - for i in range(len(x)): - out[i] = x[i] ** y[i] - return out - - cpdef sparse_fill_pow_float64(float64_t xfill, float64_t yfill): return xfill ** yfill @@ -3173,20 +2963,6 @@ cpdef sparse_pow_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_pow_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[int64_t, ndim=1] out - - out = np.empty(len(x), dtype=np.int64) - - for i in range(len(x)): - out[i] = x[i] ** y[i] - return out - - cpdef sparse_fill_pow_int64(int64_t xfill, int64_t yfill): return xfill ** yfill @@ -3368,20 +3144,6 @@ cpdef sparse_eq_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_eq_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] == y[i] - return out - - cpdef sparse_fill_eq_float64(float64_t xfill, float64_t yfill): return xfill == yfill @@ -3563,20 +3325,6 @@ cpdef sparse_eq_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_eq_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] == y[i] - return out - - cpdef sparse_fill_eq_int64(int64_t xfill, int64_t yfill): return xfill == yfill @@ -3758,20 +3506,6 @@ cpdef sparse_ne_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_ne_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] != y[i] - return out - - cpdef sparse_fill_ne_float64(float64_t xfill, float64_t yfill): return xfill != yfill @@ -3953,20 +3687,6 @@ cpdef sparse_ne_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_ne_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] != y[i] - return out - - cpdef sparse_fill_ne_int64(int64_t xfill, int64_t yfill): return xfill != yfill @@ -4148,20 +3868,6 @@ cpdef sparse_lt_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_lt_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] < y[i] - return out - - cpdef sparse_fill_lt_float64(float64_t xfill, float64_t yfill): return xfill < yfill @@ -4343,20 +4049,6 @@ cpdef sparse_lt_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_lt_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] < y[i] - return out - - cpdef sparse_fill_lt_int64(int64_t xfill, int64_t yfill): return xfill < yfill @@ -4538,20 +4230,6 @@ cpdef sparse_gt_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_gt_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] > y[i] - return out - - cpdef sparse_fill_gt_float64(float64_t xfill, float64_t yfill): return xfill > yfill @@ -4733,20 +4411,6 @@ cpdef sparse_gt_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_gt_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] > y[i] - return out - - cpdef sparse_fill_gt_int64(int64_t xfill, int64_t yfill): return xfill > yfill @@ -4928,20 +4592,6 @@ cpdef sparse_le_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_le_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] <= y[i] - return out - - cpdef sparse_fill_le_float64(float64_t xfill, float64_t yfill): return xfill <= yfill @@ -5123,20 +4773,6 @@ cpdef sparse_le_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_le_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] <= y[i] - return out - - cpdef sparse_fill_le_int64(int64_t xfill, int64_t yfill): return xfill <= yfill @@ -5318,20 +4954,6 @@ cpdef sparse_ge_float64(ndarray[float64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_ge_float64(ndarray[float64_t, ndim=1] x, - ndarray[float64_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[uint8_t, ndim=1] out - - out = np.empty(len(x), dtype=np.uint8) - - for i in range(len(x)): - out[i] = x[i] >= y[i] - return out - - cpdef sparse_fill_ge_float64(float64_t xfill, float64_t yfill): return xfill >= yfill @@ -5513,20 +5135,730 @@ cpdef sparse_ge_int64(ndarray[int64_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_ge_int64(ndarray[int64_t, ndim=1] x, - ndarray[int64_t, ndim=1] y): - """ to return NumPy compat result """ +cpdef sparse_fill_ge_int64(int64_t xfill, + int64_t yfill): + return xfill >= yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_and_int64(ndarray x_, + BlockIndex xindex, + int64_t xfill, + ndarray y_, + BlockIndex yindex, + int64_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + cdef: - Py_ssize_t i = 0 + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + ndarray[int64_t, ndim=1] x, y ndarray[uint8_t, ndim=1] out - out = np.empty(len(x), dtype=np.uint8) + # to suppress Cython warning + x = x_ + y = y_ - for i in range(len(x)): - out[i] = x[i] >= y[i] - return out + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + # Wow, what a hack job. Need to do something about this -cpdef sparse_fill_ge_int64(int64_t xfill, - int64_t yfill): - return xfill >= yfill + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] & y[yi] + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, xfill & yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_and_int64(ndarray x_, IntIndex xindex, + int64_t xfill, + ndarray y_, IntIndex yindex, + int64_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + ndarray[int32_t, ndim=1] xindices, yindices, out_indices + ndarray[int64_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if xi == xindex.npoints: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] & y[yi] + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + else: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + return out, out_index, xfill & yfill + + +cpdef sparse_and_int64(ndarray[int64_t, ndim=1] x, + SparseIndex xindex, int64_t xfill, + ndarray[int64_t, ndim=1] y, + SparseIndex yindex, int64_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_and_int64(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_and_int64(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + + +cpdef sparse_fill_and_int64(int64_t xfill, + int64_t yfill): + return xfill & yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_and_uint8(ndarray x_, + BlockIndex xindex, + uint8_t xfill, + ndarray y_, + BlockIndex yindex, + uint8_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + ndarray[uint8_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] & y[yi] + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, xfill & yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_and_uint8(ndarray x_, IntIndex xindex, + uint8_t xfill, + ndarray y_, IntIndex yindex, + uint8_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + ndarray[int32_t, ndim=1] xindices, yindices, out_indices + ndarray[uint8_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if xi == xindex.npoints: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] & y[yi] + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] & yfill + xi += 1 + else: + # use x fill value + out[out_i] = xfill & y[yi] + yi += 1 + + return out, out_index, xfill & yfill + + +cpdef sparse_and_uint8(ndarray[uint8_t, ndim=1] x, + SparseIndex xindex, uint8_t xfill, + ndarray[uint8_t, ndim=1] y, + SparseIndex yindex, uint8_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_and_uint8(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_and_uint8(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + + +cpdef sparse_fill_and_uint8(uint8_t xfill, + uint8_t yfill): + return xfill & yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_or_int64(ndarray x_, + BlockIndex xindex, + int64_t xfill, + ndarray y_, + BlockIndex yindex, + int64_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + ndarray[int64_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] | y[yi] + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, xfill | yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_or_int64(ndarray x_, IntIndex xindex, + int64_t xfill, + ndarray y_, IntIndex yindex, + int64_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + ndarray[int32_t, ndim=1] xindices, yindices, out_indices + ndarray[int64_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if xi == xindex.npoints: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] | y[yi] + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + else: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + return out, out_index, xfill | yfill + + +cpdef sparse_or_int64(ndarray[int64_t, ndim=1] x, + SparseIndex xindex, int64_t xfill, + ndarray[int64_t, ndim=1] y, + SparseIndex yindex, int64_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_or_int64(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_or_int64(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + + +cpdef sparse_fill_or_int64(int64_t xfill, + int64_t yfill): + return xfill | yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_or_uint8(ndarray x_, + BlockIndex xindex, + uint8_t xfill, + ndarray y_, + BlockIndex yindex, + uint8_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + ndarray[uint8_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] | y[yi] + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, xfill | yfill + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_or_uint8(ndarray x_, IntIndex xindex, + uint8_t xfill, + ndarray y_, IntIndex yindex, + uint8_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + ndarray[int32_t, ndim=1] xindices, yindices, out_indices + ndarray[uint8_t, ndim=1] x, y + ndarray[uint8_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.uint8) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i from 0 <= out_i < out_index.npoints: + if xi == xindex.npoints: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = x[xi] | y[yi] + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = x[xi] | yfill + xi += 1 + else: + # use x fill value + out[out_i] = xfill | y[yi] + yi += 1 + + return out, out_index, xfill | yfill + + +cpdef sparse_or_uint8(ndarray[uint8_t, ndim=1] x, + SparseIndex xindex, uint8_t xfill, + ndarray[uint8_t, ndim=1] y, + SparseIndex yindex, uint8_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_or_uint8(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_or_uint8(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + + +cpdef sparse_fill_or_uint8(uint8_t xfill, + uint8_t yfill): + return xfill | yfill diff --git a/pandas/src/sparse_op_helper.pxi.in b/pandas/src/sparse_op_helper.pxi.in index 1a0e1aa0250f6..d1d9a6f02a72c 100644 --- a/pandas/src/sparse_op_helper.pxi.in +++ b/pandas/src/sparse_op_helper.pxi.in @@ -90,8 +90,12 @@ cdef inline {{dtype}}_t __mod_{{dtype}}({{dtype}}_t a, {{dtype}}_t b): {{py: -# dtype -dtypes = ['float64', 'int64'] +# dtype, arith_comp_group, logical_group +dtypes = [('float64', True, False), + ('int64', True, True), + ('uint8', False, True)] +# do not generate arithmetic / comparison template for uint8, +# it should be done in fused types def get_op(tup): assert isinstance(tup, tuple) @@ -112,7 +116,10 @@ def get_op(tup): 'lt': '{0} < {1}', 'gt': '{0} > {1}', 'le': '{0} <= {1}', - 'ge': '{0} >= {1}'} + 'ge': '{0} >= {1}', + + 'and': '{0} & {1}', # logical op + 'or': '{0} | {1}'} return ops_dict[opname].format(lval, rval, dtype) @@ -120,19 +127,30 @@ def get_op(tup): def get_dispatch(dtypes): ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv', - 'floordiv', 'pow', 'eq', 'ne', 'lt', 'gt', 'le', 'ge'] + 'floordiv', 'pow', + 'eq', 'ne', 'lt', 'gt', 'le', 'ge', + 'and', 'or'] for opname in ops_list: - for dtype in dtypes: + for dtype, arith_comp_group, logical_group in dtypes: if opname in ('div', 'truediv'): rdtype = 'float64' elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): + # comparison op + rdtype = 'uint8' + elif opname in ('and', 'or'): + # logical op rdtype = 'uint8' else: rdtype = dtype - yield opname, dtype, rdtype + if opname in ('and', 'or'): + if logical_group: + yield opname, dtype, rdtype + else: + if arith_comp_group: + yield opname, dtype, rdtype }} @@ -316,20 +334,6 @@ cpdef sparse_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x, raise NotImplementedError -cpdef sparse_align_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x, - ndarray[{{dtype}}_t, ndim=1] y): - """ to return NumPy compat result """ - cdef: - Py_ssize_t i = 0 - ndarray[{{rdtype}}_t, ndim=1] out - - out = np.empty(len(x), dtype=np.{{rdtype}}) - - for i in range(len(x)): - out[i] = {{(opname, 'x[i]', 'y[i]', dtype) | get_op}} - return out - - cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, {{dtype}}_t yfill): return {{(opname, 'xfill', 'yfill', dtype) | get_op}}