Skip to content

Commit 0a75b91

Browse files
committed
aligned and unaligned comparison loops
1 parent 707d5d6 commit 0a75b91

File tree

2 files changed

+53
-4
lines changed

2 files changed

+53
-4
lines changed

quaddtype/README.md

+7-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@ pip install -i https://test.pypi.org/simple/ quaddtype
99

1010
## Usage
1111

12-
```
12+
```python
1313
import numpy as np
14-
from quaddtype import QuadPrecDType, QuadPrecision
14+
from numpy_quaddtype import QuadPrecDType, QuadPrecision
1515

16+
# using sleef backend (default)
1617
np.array([1,2,3], dtype=QuadPrecDType())
18+
np.array([1,2,3], dtype=QuadPrecDType("sleef"))
19+
20+
# using longdouble backend
21+
np.array([1,2,3], dtype=QuadPrecDType("longdouble"))
1722
```

quaddtype/numpy_quaddtype/src/umath.cpp

+46-2
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,51 @@ quad_generic_comp_strided_loop(PyArrayMethod_Context *context, char *const data[
633633
result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
634634
}
635635

636-
*((npy_bool *)out_ptr) = result;
636+
memcpy(out_ptr, &result, sizeof(npy_bool));
637+
638+
in1_ptr += in1_stride;
639+
in2_ptr += in2_stride;
640+
out_ptr += out_stride;
641+
}
642+
return 0;
643+
}
644+
645+
646+
template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp>
647+
int
648+
quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[],
649+
npy_intp const dimensions[], npy_intp const strides[],
650+
NpyAuxData *auxdata)
651+
{
652+
npy_intp N = dimensions[0];
653+
char *in1_ptr = data[0], *in2_ptr = data[1];
654+
char *out_ptr = data[2];
655+
npy_intp in1_stride = strides[0];
656+
npy_intp in2_stride = strides[1];
657+
npy_intp out_stride = strides[2];
658+
659+
QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[0];
660+
QuadBackendType backend = descr->backend;
661+
size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double);
662+
663+
quad_value in1, in2;
664+
while (N--)
665+
{
666+
quad_value in1 = *(quad_value *)in1_ptr;
667+
quad_value in2 = *(quad_value *)in2_ptr;
668+
669+
npy_bool result;
670+
671+
if (backend == BACKEND_SLEEF)
672+
{
673+
result = sleef_comp(&in1.sleef_value, &in2.sleef_value);
674+
}
675+
else
676+
{
677+
result = ld_comp(&in1.longdouble_value, &in2.longdouble_value);
678+
}
679+
680+
*(npy_bool *)out_ptr = result;
637681

638682
in1_ptr += in1_stride;
639683
in2_ptr += in2_stride;
@@ -670,7 +714,7 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name)
670714

671715
PyType_Slot slots[] = {
672716
{NPY_METH_resolve_descriptors, (void *)&quad_comparison_op_resolve_descriptors},
673-
{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
717+
{NPY_METH_strided_loop, (void *)&quad_generic_comp_strided_loop_aligned<sleef_comp, ld_comp>},
674718
{NPY_METH_unaligned_strided_loop,
675719
(void *)&quad_generic_comp_strided_loop<sleef_comp, ld_comp>},
676720
{0, NULL}};

0 commit comments

Comments
 (0)