Skip to content

Commit 245e8da

Browse files
committed
Refactor integer to float conversion
Extract some common routines to separate functions in order to deduplicate code and remove some of the magic.
1 parent 9d3d5af commit 245e8da

File tree

3 files changed

+131
-47
lines changed

3 files changed

+131
-47
lines changed

src/float/conv.rs

+120-44
Original file line numberDiff line numberDiff line change
@@ -9,70 +9,152 @@ use super::Float;
99
/// These are hand-optimized bit twiddling code,
1010
/// which unfortunately isn't the easiest kind of code to read.
1111
///
12-
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
12+
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
13+
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros)
14+
/// - Calculate a base mantissa by shifting the integer into mantissa position
15+
/// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
16+
/// here, so they may be "squashed" into smaller numbers to spmplifiy the classification.
1317
mod int_to_float {
18+
use super::*;
19+
20+
/// Calculate the exponent from the number of leading zeros.
21+
fn exp<I: Int, F: Float>(n: u32) -> F::Int
22+
where
23+
F::Int: CastFrom<u32>,
24+
{
25+
F::Int::cast_from(I::BITS + F::EXPONENT_BIAS - 2 - n)
26+
}
27+
28+
/// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
29+
fn m_base<I: Int, F: Float>(i_m: I) -> F::Int
30+
where
31+
F::Int: CastFrom<I>,
32+
{
33+
F::Int::cast_from(i_m >> ((I::BITS - F::BITS) + F::EXPONENT_BITS))
34+
}
35+
36+
/// Calculate the mantissa in cases where the float size is greater than integer size
37+
fn m_f_gt_i<I: Int, F: Float>(i: I, n: u32) -> F::Int
38+
where
39+
F::Int: CastFrom<I>,
40+
{
41+
F::Int::cast_from(i) << (F::SIGNIFICAND_BITS - I::BITS + 1 + n)
42+
}
43+
44+
/// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
45+
fn m_f_eq_i<I, F: Float<Int = I>>(i: I, n: u32) -> (F::Int, F::Int)
46+
where
47+
I: Int + CastInto<F::Int>,
48+
{
49+
let base = (i << n) >> F::EXPONENT_BITS;
50+
51+
// Only the lowest `F::EXPONENT_BITS` bits will be truncated. Shift them
52+
// to the highest position
53+
let adj = (i << n) << (F::SIGNIFICAND_BITS + 1);
54+
55+
(base, adj)
56+
}
57+
58+
/// Adjust a mantissa with dropped bits
59+
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
60+
// Branchlessly extract a `1` if rounding up should happen
61+
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
62+
63+
// Add one when we need to round up. Break ties to even.
64+
m_base + adj
65+
}
66+
67+
/// Combine a final float repr from an exponent and mantissa.
68+
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
69+
// + rather than | so the mantissa can overflow into the exponent
70+
(e << F::SIGNIFICAND_BITS) + m
71+
}
72+
73+
/// Perform a signed operation as unsigned, then add the sign back
74+
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
75+
where
76+
F: Float,
77+
I: Int,
78+
F::Int: CastFrom<I>,
79+
Conv: Fn(I::UnsignedInt) -> F::Int,
80+
{
81+
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
82+
F::from_repr(conv(i.unsigned_abs()) | sign_bit)
83+
}
84+
1485
pub fn u32_to_f32_bits(i: u32) -> u32 {
1586
if i == 0 {
1687
return 0;
1788
}
1889
let n = i.leading_zeros();
19-
let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
20-
let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
21-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
22-
let e = 157 - n; // Exponent plus 127, minus one.
23-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
90+
let (m_base, adj) = m_f_eq_i::<u32, f32>(i, n);
91+
let m = m_adj::<f32>(m_base, adj);
92+
let e = exp::<u32, f32>(n);
93+
repr::<f32>(e, m)
2494
}
2595

2696
pub fn u32_to_f64_bits(i: u32) -> u64 {
2797
if i == 0 {
2898
return 0;
2999
}
30100
let n = i.leading_zeros();
31-
let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
32-
let e = 1053 - n as u64; // Exponent plus 1023, minus one.
33-
(e << 52) + m // Bit 53 of m will overflow into e.
101+
let m = m_f_gt_i::<_, f64>(i, n);
102+
let e = exp::<u32, f64>(n);
103+
repr::<f64>(e, m)
34104
}
35105

36106
pub fn u64_to_f32_bits(i: u64) -> u32 {
37107
let n = i.leading_zeros();
38-
let y = i.wrapping_shl(n);
39-
let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
40-
let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
41-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
42-
let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
43-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
108+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
109+
let m_base = m_base::<_, f32>(i_m);
110+
// The entire lower half of `i` will be truncated (masked portion), plus the
111+
// next `EXPONENT_BITS` bits.
112+
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
113+
let m = m_adj::<f32>(m_base, adj);
114+
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) };
115+
repr::<f32>(e, m)
44116
}
45117

46118
pub fn u64_to_f64_bits(i: u64) -> u64 {
47119
if i == 0 {
48120
return 0;
49121
}
50122
let n = i.leading_zeros();
51-
let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
52-
let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
53-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
54-
let e = 1085 - n as u64; // Exponent plus 1023, minus one.
55-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
123+
let (m_base, adj) = m_f_eq_i::<u64, f64>(i, n);
124+
let m = m_adj::<f64>(m_base, adj);
125+
let e = exp::<u64, f64>(n);
126+
repr::<f64>(e, m)
56127
}
57128

58129
pub fn u128_to_f32_bits(i: u128) -> u32 {
59130
let n = i.leading_zeros();
60-
let y = i.wrapping_shl(n);
61-
let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
62-
let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
63-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
64-
let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
65-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
131+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
132+
let m_base = m_base::<_, f32>(i_m);
133+
134+
// Within the upper `F::BITS`, everything except for the signifcand
135+
// gets truncated
136+
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
137+
138+
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
139+
// check if it is nonzero.
140+
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
141+
let adj = d1 | d2;
142+
143+
let m = m_adj::<f32>(m_base, adj);
144+
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) };
145+
repr::<f32>(e, m)
66146
}
67147

68148
pub fn u128_to_f64_bits(i: u128) -> u64 {
69149
let n = i.leading_zeros();
70-
let y = i.wrapping_shl(n);
71-
let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
72-
let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
73-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
74-
let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
75-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
150+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
151+
let m_base = m_base::<_, f64>(i_m);
152+
// The entire lower half of `i` will be truncated (masked portion), plus the
153+
// next `EXPONENT_BITS` bits.
154+
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
155+
let m = m_adj::<f64>(m_base, adj);
156+
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) };
157+
repr::<f64>(e, m)
76158
}
77159
}
78160

@@ -113,38 +195,32 @@ intrinsics! {
113195
intrinsics! {
114196
#[arm_aeabi_alias = __aeabi_i2f]
115197
pub extern "C" fn __floatsisf(i: i32) -> f32 {
116-
let sign_bit = ((i >> 31) as u32) << 31;
117-
f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
198+
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
118199
}
119200

120201
#[arm_aeabi_alias = __aeabi_i2d]
121202
pub extern "C" fn __floatsidf(i: i32) -> f64 {
122-
let sign_bit = ((i >> 31) as u64) << 63;
123-
f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
203+
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
124204
}
125205

126206
#[arm_aeabi_alias = __aeabi_l2f]
127207
pub extern "C" fn __floatdisf(i: i64) -> f32 {
128-
let sign_bit = ((i >> 63) as u32) << 31;
129-
f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
208+
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
130209
}
131210

132211
#[arm_aeabi_alias = __aeabi_l2d]
133212
pub extern "C" fn __floatdidf(i: i64) -> f64 {
134-
let sign_bit = ((i >> 63) as u64) << 63;
135-
f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
213+
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
136214
}
137215

138216
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
139217
pub extern "C" fn __floattisf(i: i128) -> f32 {
140-
let sign_bit = ((i >> 127) as u32) << 31;
141-
f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
218+
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
142219
}
143220

144221
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
145222
pub extern "C" fn __floattidf(i: i128) -> f64 {
146-
let sign_bit = ((i >> 127) as u64) << 63;
147-
f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
223+
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
148224
}
149225
}
150226

src/float/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ pub(crate) trait Float:
3131
+ ops::Rem<Output = Self>
3232
{
3333
/// A uint of the same width as the float
34-
type Int: Int;
34+
type Int: Int<UnsignedInt = Self::Int, OtherSign = Self::SignedInt>;
3535

3636
/// A int of the same width as the float
37-
type SignedInt: Int;
37+
type SignedInt: Int<UnsignedInt = Self::Int, OtherSign = Self::Int>;
3838

3939
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
4040
type ExpInt: Int;

src/int/mod.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ pub(crate) trait Int: MinInt
8282

8383
fn unsigned(self) -> Self::UnsignedInt;
8484
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
85+
fn unsigned_abs(self) -> Self::UnsignedInt;
8586

8687
fn from_bool(b: bool) -> Self;
8788

@@ -177,7 +178,6 @@ macro_rules! int_impl_common {
177178
fn wrapping_mul(self, other: Self) -> Self {
178179
<Self>::wrapping_mul(self, other)
179180
}
180-
181181
fn wrapping_sub(self, other: Self) -> Self {
182182
<Self>::wrapping_sub(self, other)
183183
}
@@ -234,6 +234,10 @@ macro_rules! int_impl {
234234
me
235235
}
236236

237+
fn unsigned_abs(self) -> Self {
238+
self
239+
}
240+
237241
fn abs_diff(self, other: Self) -> Self {
238242
if self < other {
239243
other.wrapping_sub(self)
@@ -267,6 +271,10 @@ macro_rules! int_impl {
267271
me as $ity
268272
}
269273

274+
fn unsigned_abs(self) -> Self::UnsignedInt {
275+
self.unsigned_abs()
276+
}
277+
270278
fn abs_diff(self, other: Self) -> $uty {
271279
self.wrapping_sub(other).wrapping_abs() as $uty
272280
}

0 commit comments

Comments
 (0)