Skip to content

Commit 1d15e4e

Browse files
committed
Construct signed division functions differently
1 parent e9688c6 commit 1d15e4e

File tree

6 files changed

+221
-329
lines changed

6 files changed

+221
-329
lines changed

src/int/sdiv.rs

Lines changed: 154 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,166 @@
1-
use int::specialized_div_rem::*;
1+
use int::udiv::*;
22

3-
intrinsics! {
4-
#[maybe_use_optimized_c_shim]
5-
#[arm_aeabi_alias = __aeabi_idiv]
6-
/// Returns `n / d`
7-
pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
8-
i32_div_rem(a, b).0
9-
}
10-
11-
#[maybe_use_optimized_c_shim]
12-
/// Returns `n % d`
13-
pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 {
14-
i32_div_rem(a, b).1
15-
}
16-
17-
#[maybe_use_optimized_c_shim]
18-
/// Returns `n / d` and sets `*rem = n % d`
19-
pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 {
20-
let quo_rem = i32_div_rem(a, b);
21-
*rem = quo_rem.1;
22-
quo_rem.0
3+
macro_rules! sdivmod {
4+
(
5+
$unsigned_fn:ident, // name of the unsigned division function
6+
$signed_fn:ident, // name of the signed division function
7+
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
8+
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
9+
$($attr:tt),* // attributes
10+
) => {
11+
intrinsics! {
12+
$(
13+
#[$attr]
14+
)*
15+
/// Returns `n / d` and sets `*rem = n % d`
16+
pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX {
17+
let a_neg = a < 0;
18+
let b_neg = b < 0;
19+
let mut a = a;
20+
let mut b = b;
21+
if a_neg {
22+
a = a.wrapping_neg();
23+
}
24+
if b_neg {
25+
b = b.wrapping_neg();
26+
}
27+
let mut r = *rem as $uX;
28+
let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX;
29+
let mut r = r as $iX;
30+
if a_neg {
31+
r = r.wrapping_neg();
32+
}
33+
*rem = r;
34+
if a_neg != b_neg {
35+
t.wrapping_neg()
36+
} else {
37+
t
38+
}
39+
}
40+
}
2341
}
42+
}
2443

25-
#[maybe_use_optimized_c_shim]
26-
/// Returns `n / d`
27-
pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 {
28-
i64_div_rem(a, b).0
44+
macro_rules! sdiv {
45+
(
46+
$unsigned_fn:ident, // name of the unsigned division function
47+
$signed_fn:ident, // name of the signed division function
48+
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
49+
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
50+
$($attr:tt),* // attributes
51+
) => {
52+
intrinsics! {
53+
$(
54+
#[$attr]
55+
)*
56+
/// Returns `n / d`
57+
pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
58+
let a_neg = a < 0;
59+
let b_neg = b < 0;
60+
let mut a = a;
61+
let mut b = b;
62+
if a_neg {
63+
a = a.wrapping_neg();
64+
}
65+
if b_neg {
66+
b = b.wrapping_neg();
67+
}
68+
let t = $unsigned_fn(a as $uX, b as $uX) as $iX;
69+
if a_neg != b_neg {
70+
t.wrapping_neg()
71+
} else {
72+
t
73+
}
74+
}
75+
}
2976
}
77+
}
3078

31-
#[maybe_use_optimized_c_shim]
32-
/// Returns `n % d`
33-
pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 {
34-
i64_div_rem(a, b).1
79+
macro_rules! smod {
80+
(
81+
$unsigned_fn:ident, // name of the unsigned division function
82+
$signed_fn:ident, // name of the signed division function
83+
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
84+
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
85+
$($attr:tt),* // attributes
86+
) => {
87+
intrinsics! {
88+
$(
89+
#[$attr]
90+
)*
91+
/// Returns `n % d`
92+
pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
93+
let a_neg = a < 0;
94+
let b_neg = b < 0;
95+
let mut a = a;
96+
let mut b = b;
97+
if a_neg {
98+
a = a.wrapping_neg();
99+
}
100+
if b_neg {
101+
b = b.wrapping_neg();
102+
}
103+
let r = $unsigned_fn(a as $uX, b as $uX) as $iX;
104+
if a_neg {
105+
r.wrapping_neg()
106+
} else {
107+
r
108+
}
109+
}
110+
}
35111
}
112+
}
36113

114+
sdivmod!(
115+
__udivmodsi4,
116+
__divmodsi4,
117+
u32,
118+
i32,
119+
maybe_use_optimized_c_shim
120+
);
121+
// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros
122+
intrinsics! {
37123
#[maybe_use_optimized_c_shim]
38-
/// Returns `n / d` and sets `*rem = n % d`
39-
pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 {
40-
let quo_rem = i64_div_rem(a, b);
41-
*rem = quo_rem.1;
42-
quo_rem.0
43-
}
44-
45-
#[win64_128bit_abi_hack]
124+
#[arm_aeabi_alias = __aeabi_idiv]
46125
/// Returns `n / d`
47-
pub extern "C" fn __divti3(a: i128, b: i128) -> i128 {
48-
i128_div_rem(a, b).0
126+
pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
127+
let a_neg = a < 0;
128+
let b_neg = b < 0;
129+
let mut a = a;
130+
let mut b = b;
131+
if a_neg {
132+
a = a.wrapping_neg();
133+
}
134+
if b_neg {
135+
b = b.wrapping_neg();
136+
}
137+
let t = __udivsi3(a as u32, b as u32) as i32;
138+
if a_neg != b_neg {
139+
t.wrapping_neg()
140+
} else {
141+
t
142+
}
49143
}
144+
}
145+
smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim);
50146

51-
#[win64_128bit_abi_hack]
52-
/// Returns `n % d`
53-
pub extern "C" fn __modti3(a: i128, b: i128) -> i128 {
54-
i128_div_rem(a, b).1
55-
}
147+
sdivmod!(
148+
__udivmoddi4,
149+
__divmoddi4,
150+
u64,
151+
i64,
152+
maybe_use_optimized_c_shim
153+
);
154+
sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim);
155+
smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim);
56156

57-
// LLVM does not currently have a `__divmodti4` function, but GCC does
58-
#[maybe_use_optimized_c_shim]
59-
/// Returns `n / d` and sets `*rem = n % d`
60-
pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 {
61-
let quo_rem = i128_div_rem(a, b);
62-
*rem = quo_rem.1;
63-
quo_rem.0
64-
}
65-
}
157+
// LLVM does not currently have a `__divmodti4` function, but GCC does
158+
sdivmod!(
159+
__udivmodti4,
160+
__divmodti4,
161+
u128,
162+
i128,
163+
maybe_use_optimized_c_shim
164+
);
165+
sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack);
166+
smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack);
Lines changed: 9 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,23 @@
1-
/// Creates unsigned and signed division functions optimized for dividing integers with the same
1+
/// Creates an unsigned division function optimized for dividing integers with the same
22
/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an
33
/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits
44
/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to
55
/// construct a full 128 bit by 128 bit division.
66
#[macro_export]
77
macro_rules! impl_asymmetric {
88
(
9-
$unsigned_name:ident, // name of the unsigned division function
10-
$signed_name:ident, // name of the signed division function
9+
$fn:ident, // name of the unsigned division function
1110
$zero_div_fn:ident, // function called when division by zero is attempted
1211
$half_division:ident, // function for division of a $uX by a $uX
1312
$asymmetric_division:ident, // function for division of a $uD by a $uX
1413
$n_h:expr, // the number of bits in a $iH or $uH
1514
$uH:ident, // unsigned integer with half the bit width of $uX
1615
$uX:ident, // unsigned integer with half the bit width of $uD
17-
$uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
18-
$iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
19-
$($unsigned_attr:meta),*; // attributes for the unsigned function
20-
$($signed_attr:meta),* // attributes for the signed function
16+
$uD:ident // unsigned integer type for the inputs and outputs of `$fn`
2117
) => {
2218
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
2319
/// tuple.
24-
$(
25-
#[$unsigned_attr]
26-
)*
27-
pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) {
20+
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
2821
let n: u32 = $n_h * 2;
2922

3023
let duo_lo = duo as $uX;
@@ -38,14 +31,14 @@ macro_rules! impl_asymmetric {
3831
if duo_hi < div_lo {
3932
// `$uD` by `$uX` division with a quotient that will fit into a `$uX`
4033
let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
41-
return (quo as $uD, rem as $uD)
34+
return (quo as $uD, rem as $uD);
4235
} else {
4336
// Short division using the $uD by $uX division
4437
let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
4538
let tmp = unsafe {
4639
$asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo)
4740
};
48-
return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD)
41+
return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD);
4942
}
5043
}
5144

@@ -57,9 +50,7 @@ macro_rules! impl_asymmetric {
5750
let div_lz = div_hi.leading_zeros();
5851
let div_extra = n - div_lz;
5952
let div_sig_n = (div >> div_extra) as $uX;
60-
let tmp = unsafe {
61-
$asymmetric_division(duo >> 1, div_sig_n)
62-
};
53+
let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) };
6354

6455
let mut quo = tmp.0 >> ((n - 1) - div_lz);
6556
if quo != 0 {
@@ -72,33 +63,7 @@ macro_rules! impl_asymmetric {
7263
quo += 1;
7364
rem -= div;
7465
}
75-
return (quo as $uD, rem)
66+
return (quo as $uD, rem);
7667
}
77-
78-
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
79-
/// tuple.
80-
$(
81-
#[$signed_attr]
82-
)*
83-
pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
84-
match (duo < 0, div < 0) {
85-
(false, false) => {
86-
let t = $unsigned_name(duo as $uD, div as $uD);
87-
(t.0 as $iD, t.1 as $iD)
88-
},
89-
(true, false) => {
90-
let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
91-
((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
92-
},
93-
(false, true) => {
94-
let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
95-
((t.0 as $iD).wrapping_neg(), t.1 as $iD)
96-
},
97-
(true, true) => {
98-
let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
99-
(t.0 as $iD, (t.1 as $iD).wrapping_neg())
100-
},
101-
}
102-
}
103-
}
68+
};
10469
}

0 commit comments

Comments
 (0)