-
Notifications
You must be signed in to change notification settings - Fork 382
Commit be628d3
committed
Auto merge of #130002 - orlp:better-div-floor-ceil, r=thomcc
better implementation of signed div_floor/ceil
Tracking issue for signed `div_floor`/`div_ceil`: rust-lang/rust#88581.
This PR improves the implementation of those two functions by adding a better branchless algorithm. Side-by-side comparison of `i32::div_floor` on x86-64:
```asm
div_floor_new: div_floor_old:
push rax push rax
test esi, esi test esi, esi
je .LBB0_3 je .LBB1_6
mov eax, esi mov eax, esi
not eax not eax
lea ecx, [rdi - 2147483648] lea ecx, [rdi - 2147483648]
or ecx, eax or ecx, eax
je .LBB0_2 je .LBB1_7
mov eax, edi mov eax, edi
cdq cdq
idiv esi idiv esi
xor esi, edi test edx, edx
sar esi, 31 setg cl
test edx, edx test esi, esi
cmove esi, edx sets dil
add eax, esi test dil, cl
pop rcx jne .LBB1_4
ret test edx, edx
.LBB0_3: setns cl
lea rdi, [rip + .L__unnamed_1] test esi, esi
call qword ptr [rip + panic...] setle dl
.LBB0_2: or dl, cl
lea rdi, [rip + .L__unnamed_1] jne .LBB1_5
call qword ptr [rip + panic...] .LBB1_4:
dec eax
.LBB1_5:
pop rcx
ret
.LBB1_6:
lea rdi, [rip + .L__unnamed_2]
call qword ptr [rip + panic...]
.LBB1_7:
lea rdi, [rip + .L__unnamed_2]
call qword ptr [rip + panic...]
```
And on Aarch64:
```asm
_div_floor_new: _div_floor_old:
stp x29, x30, [sp, #-16]! stp x29, x30, [sp, #-16]!
mov x29, sp mov x29, sp
cbz w1, LBB0_4 cbz w1, LBB1_9
mov w8, #-2147483648 mov x8, x0
cmp w0, w8 mov w9, #-2147483648
b.ne LBB0_3 cmp w0, w9
cmn w1, #1 b.ne LBB1_3
b.eq LBB0_5 cmn w1, #1
LBB0_3: b.eq LBB1_10
sdiv w8, w0, w1 LBB1_3:
msub w9, w8, w1, w0 sdiv w0, w8, w1
eor w10, w1, w0 msub w8, w0, w1, w8
asr w10, w10, #31 tbz w1, #31, LBB1_5
cmp w9, #0 cmp w8, #0
csel w9, wzr, w10, eq b.gt LBB1_7
add w0, w9, w8 LBB1_5:
ldp x29, x30, [sp], #16 cmp w1, #1
ret b.lt LBB1_8
LBB0_4: tbz w8, #31, LBB1_8
adrp x0, l___unnamed_1@PAGE LBB1_7:
add x0, x0, l___unnamed_1@PAGEOFF sub w0, w0, #1
bl panic... LBB1_8:
LBB0_5: ldp x29, x30, [sp], #16
adrp x0, l___unnamed_1@PAGE ret
add x0, x0, l___unnamed_1@PAGEOFF LBB1_9:
bl panic... adrp x0, l___unnamed_2@PAGE
add x0, x0, l___unnamed_2@PAGEOFF
bl panic...
LBB1_10:
adrp x0, l___unnamed_2@PAGE
add x0, x0, l___unnamed_2@PAGEOFF
bl panic...
```File tree
0 file changed
+0
-0
lines changedFilter options
0 file changed
+0
-0
lines changed
0 commit comments