@@ -6,73 +6,161 @@ use super::Float;
6
6
7
7
/// Conversions from integers to floats.
8
8
///
9
- /// These are hand-optimized bit twiddling code,
10
- /// which unfortunately isn't the easiest kind of code to read.
9
+ /// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
10
+ /// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
11
+ /// mantissa _with the implicit bit set_!
12
+ /// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
13
+ /// patterns are used to simplify this. Adjust the mantissa with the result if needed.
14
+ /// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
15
+ /// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
16
+ /// from the exponent (above) accounts for the explicit bit being set in the mantissa.
11
17
///
12
- /// The algorithm is explained here: <https://blog.m-ou.se/floats/>
18
+ /// # Terminology
19
+ ///
20
+ /// - `i`: the original integer
21
+ /// - `i_m`: the integer, shifted fully left (no leading zeros)
22
+ /// - `n`: number of leading zeroes
23
+ /// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
24
+ /// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
25
+ /// - `adj`: the bits that will be truncated, possibly compressed in some way.
26
+ /// - `m`: the resulting mantissa. Implicit bit is usually set.
13
27
mod int_to_float {
28
+ use super :: * ;
29
+
30
+ /// Calculate the exponent from the number of leading zeros.
31
+ ///
32
+ /// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
33
+ /// bit set can be added back later.
34
+ fn exp < I : Int , F : Float < Int : CastFrom < u32 > > > ( n : u32 ) -> F :: Int {
35
+ F :: Int :: cast_from ( F :: EXPONENT_BIAS - 1 + I :: BITS - n)
36
+ }
37
+
38
+ /// Adjust a mantissa with dropped bits to perform correct rounding.
39
+ ///
40
+ /// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
41
+ /// can be combined or compressed in some way that simplifies operations.
42
+ fn m_adj < F : Float > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
43
+ // Branchlessly extract a `1` if rounding up should happen, 0 otherwise
44
+ // This accounts for rounding to even.
45
+ let adj = ( dropped_bits - ( dropped_bits >> ( F :: BITS - 1 ) & !m_base) ) >> ( F :: BITS - 1 ) ;
46
+
47
+ // Add one when we need to round up. Break ties to even.
48
+ m_base + adj
49
+ }
50
+
51
+ /// Shift the exponent to its position and add the mantissa.
52
+ ///
53
+ /// If the mantissa has the implicit bit set, the exponent should be one less than its actual
54
+ /// value to cancel it out.
55
+ fn repr < F : Float > ( e : F :: Int , m : F :: Int ) -> F :: Int {
56
+ // + rather than | so the mantissa can overflow into the exponent
57
+ ( e << F :: SIGNIFICAND_BITS ) + m
58
+ }
59
+
60
+ /// Shift distance from a left-aligned integer to a smaller float.
61
+ fn shift_f_lt_i < I : Int , F : Float > ( ) -> u32 {
62
+ ( I :: BITS - F :: BITS ) + F :: EXPONENT_BITS
63
+ }
64
+
65
+ /// Shift distance from an integer with `n` leading zeros to a smaller float.
66
+ fn shift_f_gt_i < I : Int , F : Float > ( n : u32 ) -> u32 {
67
+ F :: SIGNIFICAND_BITS - I :: BITS + 1 + n
68
+ }
69
+
70
+ /// Perform a signed operation as unsigned, then add the sign back.
71
+ pub fn signed < I , F , Conv > ( i : I , conv : Conv ) -> F
72
+ where
73
+ F : Float ,
74
+ I : Int ,
75
+ F :: Int : CastFrom < I > ,
76
+ Conv : Fn ( I :: UnsignedInt ) -> F :: Int ,
77
+ {
78
+ let sign_bit = F :: Int :: cast_from ( i >> ( I :: BITS - 1 ) ) << ( F :: BITS - 1 ) ;
79
+ F :: from_bits ( conv ( i. unsigned_abs ( ) ) | sign_bit)
80
+ }
81
+
14
82
pub fn u32_to_f32_bits ( i : u32 ) -> u32 {
15
83
if i == 0 {
16
84
return 0 ;
17
85
}
18
86
let n = i. leading_zeros ( ) ;
19
- let a = ( i << n) >> 8 ; // Significant bits, with bit 24 still in tact.
20
- let b = ( i << n) << 24 ; // Insignificant bits, only relevant for rounding.
21
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
22
- let e = 157 - n; // Exponent plus 127, minus one.
23
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
87
+ // Mantissa with implicit bit set (significant bits)
88
+ let m_base = ( i << n) >> f32:: EXPONENT_BITS ;
89
+ // Bits that will be dropped (insignificant bits)
90
+ let adj = ( i << n) << ( f32:: SIGNIFICAND_BITS + 1 ) ;
91
+ let m = m_adj :: < f32 > ( m_base, adj) ;
92
+ let e = exp :: < u32 , f32 > ( n) - 1 ;
93
+ repr :: < f32 > ( e, m)
24
94
}
25
95
26
96
pub fn u32_to_f64_bits ( i : u32 ) -> u64 {
27
97
if i == 0 {
28
98
return 0 ;
29
99
}
30
100
let n = i. leading_zeros ( ) ;
31
- let m = ( i as u64 ) << ( 21 + n) ; // Significant bits, with bit 53 still in tact.
32
- let e = 1053 - n as u64 ; // Exponent plus 1023, minus one.
33
- ( e << 52 ) + m // Bit 53 of m will overflow into e.
101
+ // Mantissa with implicit bit set
102
+ let m = ( i as u64 ) << shift_f_gt_i :: < u32 , f64 > ( n) ;
103
+ let e = exp :: < u32 , f64 > ( n) - 1 ;
104
+ repr :: < f64 > ( e, m)
34
105
}
35
106
36
107
pub fn u64_to_f32_bits ( i : u64 ) -> u32 {
37
108
let n = i. leading_zeros ( ) ;
38
- let y = i. wrapping_shl ( n) ;
39
- let a = ( y >> 40 ) as u32 ; // Significant bits, with bit 24 still in tact.
40
- let b = ( y >> 8 | y & 0xFFFF ) as u32 ; // Insignificant bits, only relevant for rounding.
41
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
42
- let e = if i == 0 { 0 } else { 189 - n } ; // Exponent plus 127, minus one, except for zero.
43
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
109
+ let i_m = i. wrapping_shl ( n) ;
110
+ // Mantissa with implicit bit set
111
+ let m_base: u32 = ( i_m >> shift_f_lt_i :: < u64 , f32 > ( ) ) as u32 ;
112
+ // The entire lower half of `i` will be truncated (masked portion), plus the
113
+ // next `EXPONENT_BITS` bits.
114
+ let adj = ( i_m >> f32:: EXPONENT_BITS | i_m & 0xFFFF ) as u32 ;
115
+ let m = m_adj :: < f32 > ( m_base, adj) ;
116
+ let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) - 1 } ;
117
+ repr :: < f32 > ( e, m)
44
118
}
45
119
46
120
pub fn u64_to_f64_bits ( i : u64 ) -> u64 {
47
121
if i == 0 {
48
122
return 0 ;
49
123
}
50
124
let n = i. leading_zeros ( ) ;
51
- let a = ( i << n) >> 11 ; // Significant bits, with bit 53 still in tact.
52
- let b = ( i << n) << 53 ; // Insignificant bits, only relevant for rounding.
53
- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
54
- let e = 1085 - n as u64 ; // Exponent plus 1023, minus one.
55
- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
125
+ // Mantissa with implicit bit set
126
+ let m_base = ( i << n) >> f64:: EXPONENT_BITS ;
127
+ let adj = ( i << n) << ( f64:: SIGNIFICAND_BITS + 1 ) ;
128
+ let m = m_adj :: < f64 > ( m_base, adj) ;
129
+ let e = exp :: < u64 , f64 > ( n) - 1 ;
130
+ repr :: < f64 > ( e, m)
56
131
}
57
132
58
133
pub fn u128_to_f32_bits ( i : u128 ) -> u32 {
59
134
let n = i. leading_zeros ( ) ;
60
- let y = i. wrapping_shl ( n) ;
61
- let a = ( y >> 104 ) as u32 ; // Significant bits, with bit 24 still in tact.
62
- let b = ( y >> 72 ) as u32 | ( ( y << 32 ) >> 32 != 0 ) as u32 ; // Insignificant bits, only relevant for rounding.
63
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
64
- let e = if i == 0 { 0 } else { 253 - n } ; // Exponent plus 127, minus one, except for zero.
65
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
135
+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
136
+ let m_base: u32 = ( i_m >> shift_f_lt_i :: < u128 , f32 > ( ) ) as u32 ;
137
+
138
+ // Within the upper `F::BITS`, everything except for the signifcand
139
+ // gets truncated
140
+ let d1: u32 = ( i_m >> ( u128:: BITS - f32:: BITS - f32:: SIGNIFICAND_BITS - 1 ) ) . cast ( ) ;
141
+
142
+ // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
143
+ // check if it is nonzero.
144
+ let d2: u32 = ( i_m << f32:: BITS >> f32:: BITS != 0 ) . into ( ) ;
145
+ let adj = d1 | d2;
146
+
147
+ // Mantissa with implicit bit set
148
+ let m = m_adj :: < f32 > ( m_base, adj) ;
149
+ let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) - 1 } ;
150
+ repr :: < f32 > ( e, m)
66
151
}
67
152
68
153
pub fn u128_to_f64_bits ( i : u128 ) -> u64 {
69
154
let n = i. leading_zeros ( ) ;
70
- let y = i. wrapping_shl ( n) ;
71
- let a = ( y >> 75 ) as u64 ; // Significant bits, with bit 53 still in tact.
72
- let b = ( y >> 11 | y & 0xFFFF_FFFF ) as u64 ; // Insignificant bits, only relevant for rounding.
73
- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
74
- let e = if i == 0 { 0 } else { 1149 - n as u64 } ; // Exponent plus 1023, minus one, except for zero.
75
- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
155
+ let i_m = i. wrapping_shl ( n) ;
156
+ // Mantissa with implicit bit set
157
+ let m_base: u64 = ( i_m >> shift_f_lt_i :: < u128 , f64 > ( ) ) as u64 ;
158
+ // The entire lower half of `i` will be truncated (masked portion), plus the
159
+ // next `EXPONENT_BITS` bits.
160
+ let adj = ( i_m >> f64:: EXPONENT_BITS | i_m & 0xFFFF_FFFF ) as u64 ;
161
+ let m = m_adj :: < f64 > ( m_base, adj) ;
162
+ let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) - 1 } ;
163
+ repr :: < f64 > ( e, m)
76
164
}
77
165
}
78
166
@@ -113,38 +201,32 @@ intrinsics! {
113
201
intrinsics ! {
114
202
#[ arm_aeabi_alias = __aeabi_i2f]
115
203
pub extern "C" fn __floatsisf( i: i32 ) -> f32 {
116
- let sign_bit = ( ( i >> 31 ) as u32 ) << 31 ;
117
- f32 :: from_bits( int_to_float:: u32_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
204
+ int_to_float:: signed( i, int_to_float:: u32_to_f32_bits)
118
205
}
119
206
120
207
#[ arm_aeabi_alias = __aeabi_i2d]
121
208
pub extern "C" fn __floatsidf( i: i32 ) -> f64 {
122
- let sign_bit = ( ( i >> 31 ) as u64 ) << 63 ;
123
- f64 :: from_bits( int_to_float:: u32_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
209
+ int_to_float:: signed( i, int_to_float:: u32_to_f64_bits)
124
210
}
125
211
126
212
#[ arm_aeabi_alias = __aeabi_l2f]
127
213
pub extern "C" fn __floatdisf( i: i64 ) -> f32 {
128
- let sign_bit = ( ( i >> 63 ) as u32 ) << 31 ;
129
- f32 :: from_bits( int_to_float:: u64_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
214
+ int_to_float:: signed( i, int_to_float:: u64_to_f32_bits)
130
215
}
131
216
132
217
#[ arm_aeabi_alias = __aeabi_l2d]
133
218
pub extern "C" fn __floatdidf( i: i64 ) -> f64 {
134
- let sign_bit = ( ( i >> 63 ) as u64 ) << 63 ;
135
- f64 :: from_bits( int_to_float:: u64_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
219
+ int_to_float:: signed( i, int_to_float:: u64_to_f64_bits)
136
220
}
137
221
138
222
#[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
139
223
pub extern "C" fn __floattisf( i: i128 ) -> f32 {
140
- let sign_bit = ( ( i >> 127 ) as u32 ) << 31 ;
141
- f32 :: from_bits( int_to_float:: u128_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
224
+ int_to_float:: signed( i, int_to_float:: u128_to_f32_bits)
142
225
}
143
226
144
227
#[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
145
228
pub extern "C" fn __floattidf( i: i128 ) -> f64 {
146
- let sign_bit = ( ( i >> 127 ) as u64 ) << 63 ;
147
- f64 :: from_bits( int_to_float:: u128_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
229
+ int_to_float:: signed( i, int_to_float:: u128_to_f64_bits)
148
230
}
149
231
}
150
232
0 commit comments