Skip to content

Commit 7dee0a0

Browse files
committed
Refactor integer to float conversion
Extract some common routines to separate functions in order to deduplicate code and remove some of the magic.
1 parent 0bf4f0f commit 7dee0a0

File tree

3 files changed

+138
-48
lines changed

3 files changed

+138
-48
lines changed

src/float/conv.rs

+128-46
Original file line numberDiff line numberDiff line change
@@ -6,73 +6,161 @@ use super::Float;
66

77
/// Conversions from integers to floats.
88
///
9-
/// These are hand-optimized bit twiddling code,
10-
/// which unfortunately isn't the easiest kind of code to read.
9+
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
10+
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
11+
/// mantissa _with the implicit bit set_!
12+
/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
13+
/// patterns are used to simplify this. Adjust the mantissa with the result if needed.
14+
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
15+
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
16+
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
1117
///
12-
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
18+
/// # Terminology
19+
///
20+
/// - `i`: the original integer
21+
/// - `i_m`: the integer, shifted fully left (no leading zeros)
22+
/// - `n`: number of leading zeroes
23+
/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
24+
/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
25+
/// - `adj`: the bits that will be truncated, possibly compressed in some way.
26+
/// - `m`: the resulting mantissa. Implicit bit is usually set.
1327
mod int_to_float {
28+
use super::*;
29+
30+
/// Calculate the exponent from the number of leading zeros.
31+
///
32+
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
33+
/// bit set can be added back later.
34+
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
35+
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
36+
}
37+
38+
/// Adjust a mantissa with dropped bits to perform correct rounding.
39+
///
40+
/// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
41+
/// can be combined or compressed in some way that simplifies operations.
42+
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
43+
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
44+
// This accounts for rounding to even.
45+
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
46+
47+
// Add one when we need to round up. Break ties to even.
48+
m_base + adj
49+
}
50+
51+
/// Shift the exponent to its position and add the mantissa.
52+
///
53+
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
54+
/// value to cancel it out.
55+
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
56+
// + rather than | so the mantissa can overflow into the exponent
57+
(e << F::SIGNIFICAND_BITS) + m
58+
}
59+
60+
/// Shift distance from a left-aligned integer to a smaller float.
61+
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
62+
(I::BITS - F::BITS) + F::EXPONENT_BITS
63+
}
64+
65+
/// Shift distance from an integer with `n` leading zeros to a smaller float.
66+
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
67+
F::SIGNIFICAND_BITS - I::BITS + 1 + n
68+
}
69+
70+
/// Perform a signed operation as unsigned, then add the sign back.
71+
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
72+
where
73+
F: Float,
74+
I: Int,
75+
F::Int: CastFrom<I>,
76+
Conv: Fn(I::UnsignedInt) -> F::Int,
77+
{
78+
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
79+
F::from_repr(conv(i.unsigned_abs()) | sign_bit)
80+
}
81+
1482
pub fn u32_to_f32_bits(i: u32) -> u32 {
1583
if i == 0 {
1684
return 0;
1785
}
1886
let n = i.leading_zeros();
19-
let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
20-
let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
21-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
22-
let e = 157 - n; // Exponent plus 127, minus one.
23-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
87+
// Mantissa with implicit bit set (significant bits)
88+
let m_base = (i << n) >> f32::EXPONENT_BITS;
89+
// Bits that will be dropped (insignificant bits)
90+
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
91+
let m = m_adj::<f32>(m_base, adj);
92+
let e = exp::<u32, f32>(n) - 1;
93+
repr::<f32>(e, m)
2494
}
2595

2696
pub fn u32_to_f64_bits(i: u32) -> u64 {
2797
if i == 0 {
2898
return 0;
2999
}
30100
let n = i.leading_zeros();
31-
let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
32-
let e = 1053 - n as u64; // Exponent plus 1023, minus one.
33-
(e << 52) + m // Bit 53 of m will overflow into e.
101+
// Mantissa with implicit bit set
102+
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
103+
let e = exp::<u32, f64>(n) - 1;
104+
repr::<f64>(e, m)
34105
}
35106

36107
pub fn u64_to_f32_bits(i: u64) -> u32 {
37108
let n = i.leading_zeros();
38-
let y = i.wrapping_shl(n);
39-
let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
40-
let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
41-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
42-
let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
43-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
109+
let i_m = i.wrapping_shl(n);
110+
// Mantissa with implicit bit set
111+
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
112+
// The entire lower half of `i` will be truncated (masked portion), plus the
113+
// next `EXPONENT_BITS` bits.
114+
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
115+
let m = m_adj::<f32>(m_base, adj);
116+
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
117+
repr::<f32>(e, m)
44118
}
45119

46120
pub fn u64_to_f64_bits(i: u64) -> u64 {
47121
if i == 0 {
48122
return 0;
49123
}
50124
let n = i.leading_zeros();
51-
let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
52-
let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
53-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
54-
let e = 1085 - n as u64; // Exponent plus 1023, minus one.
55-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
125+
// Mantissa with implicit bit set
126+
let m_base = (i << n) >> f64::EXPONENT_BITS;
127+
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
128+
let m = m_adj::<f64>(m_base, adj);
129+
let e = exp::<u64, f64>(n) - 1;
130+
repr::<f64>(e, m)
56131
}
57132

58133
pub fn u128_to_f32_bits(i: u128) -> u32 {
59134
let n = i.leading_zeros();
60-
let y = i.wrapping_shl(n);
61-
let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
62-
let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
63-
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
64-
let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
65-
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
135+
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
136+
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
137+
138+
// Within the upper `F::BITS`, everything except for the signifcand
139+
// gets truncated
140+
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
141+
142+
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
143+
// check if it is nonzero.
144+
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
145+
let adj = d1 | d2;
146+
147+
// Mantissa with implicit bit set
148+
let m = m_adj::<f32>(m_base, adj);
149+
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
150+
repr::<f32>(e, m)
66151
}
67152

68153
pub fn u128_to_f64_bits(i: u128) -> u64 {
69154
let n = i.leading_zeros();
70-
let y = i.wrapping_shl(n);
71-
let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
72-
let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
73-
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
74-
let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
75-
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
155+
let i_m = i.wrapping_shl(n);
156+
// Mantissa with implicit bit set
157+
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
158+
// The entire lower half of `i` will be truncated (masked portion), plus the
159+
// next `EXPONENT_BITS` bits.
160+
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
161+
let m = m_adj::<f64>(m_base, adj);
162+
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
163+
repr::<f64>(e, m)
76164
}
77165
}
78166

@@ -113,38 +201,32 @@ intrinsics! {
113201
intrinsics! {
114202
#[arm_aeabi_alias = __aeabi_i2f]
115203
pub extern "C" fn __floatsisf(i: i32) -> f32 {
116-
let sign_bit = ((i >> 31) as u32) << 31;
117-
f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
204+
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
118205
}
119206

120207
#[arm_aeabi_alias = __aeabi_i2d]
121208
pub extern "C" fn __floatsidf(i: i32) -> f64 {
122-
let sign_bit = ((i >> 31) as u64) << 63;
123-
f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
209+
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
124210
}
125211

126212
#[arm_aeabi_alias = __aeabi_l2f]
127213
pub extern "C" fn __floatdisf(i: i64) -> f32 {
128-
let sign_bit = ((i >> 63) as u32) << 31;
129-
f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
214+
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
130215
}
131216

132217
#[arm_aeabi_alias = __aeabi_l2d]
133218
pub extern "C" fn __floatdidf(i: i64) -> f64 {
134-
let sign_bit = ((i >> 63) as u64) << 63;
135-
f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
219+
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
136220
}
137221

138222
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
139223
pub extern "C" fn __floattisf(i: i128) -> f32 {
140-
let sign_bit = ((i >> 127) as u32) << 31;
141-
f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
224+
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
142225
}
143226

144227
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
145228
pub extern "C" fn __floattidf(i: i128) -> f64 {
146-
let sign_bit = ((i >> 127) as u64) << 63;
147-
f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
229+
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
148230
}
149231
}
150232

src/int/mod.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt
8383

8484
fn unsigned(self) -> Self::UnsignedInt;
8585
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
86+
fn unsigned_abs(self) -> Self::UnsignedInt;
8687

8788
fn from_bool(b: bool) -> Self;
8889

@@ -178,7 +179,6 @@ macro_rules! int_impl_common {
178179
fn wrapping_mul(self, other: Self) -> Self {
179180
<Self>::wrapping_mul(self, other)
180181
}
181-
182182
fn wrapping_sub(self, other: Self) -> Self {
183183
<Self>::wrapping_sub(self, other)
184184
}
@@ -235,6 +235,10 @@ macro_rules! int_impl {
235235
me
236236
}
237237

238+
fn unsigned_abs(self) -> Self {
239+
self
240+
}
241+
238242
fn abs_diff(self, other: Self) -> Self {
239243
if self < other {
240244
other.wrapping_sub(self)
@@ -268,6 +272,10 @@ macro_rules! int_impl {
268272
me as $ity
269273
}
270274

275+
fn unsigned_abs(self) -> Self::UnsignedInt {
276+
self.unsigned_abs()
277+
}
278+
271279
fn abs_diff(self, other: Self) -> $uty {
272280
self.wrapping_sub(other).wrapping_abs() as $uty
273281
}

testcrate/tests/conv.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use compiler_builtins::float::Float;
88
use rustc_apfloat::{Float as _, FloatConvert as _};
99
use testcrate::*;
1010

11-
mod int_to_float {
11+
mod i_to_f {
1212
use super::*;
1313

1414
macro_rules! i_to_f {

0 commit comments

Comments
 (0)