@@ -9,70 +9,152 @@ use super::Float;
9
9
/// These are hand-optimized bit twiddling code,
10
10
/// which unfortunately isn't the easiest kind of code to read.
11
11
///
12
- /// The algorithm is explained here: <https://blog.m-ou.se/floats/>
12
+ /// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
13
+ /// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros)
14
+ /// - Calculate a base mantissa by shifting the integer into mantissa position
15
+ /// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
16
+ /// here, so they may be "squashed" into smaller numbers to spmplifiy the classification.
13
17
mod int_to_float {
18
+ use super :: * ;
19
+
20
+ /// Calculate the exponent from the number of leading zeros.
21
+ fn exp < I : Int , F : Float > ( n : u32 ) -> F :: Int
22
+ where
23
+ F :: Int : CastFrom < u32 > ,
24
+ {
25
+ F :: Int :: cast_from ( I :: BITS + F :: EXPONENT_BIAS - 2 - n)
26
+ }
27
+
28
+ /// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
29
+ fn m_base < I : Int , F : Float > ( i_m : I ) -> F :: Int
30
+ where
31
+ F :: Int : CastFrom < I > ,
32
+ {
33
+ F :: Int :: cast_from ( i_m >> ( ( I :: BITS - F :: BITS ) + F :: EXPONENT_BITS ) )
34
+ }
35
+
36
+ /// Calculate the mantissa in cases where the float size is greater than integer size
37
+ fn m_f_gt_i < I : Int , F : Float > ( i : I , n : u32 ) -> F :: Int
38
+ where
39
+ F :: Int : CastFrom < I > ,
40
+ {
41
+ F :: Int :: cast_from ( i) << ( F :: SIGNIFICAND_BITS - I :: BITS + 1 + n)
42
+ }
43
+
44
+ /// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
45
+ fn m_f_eq_i < I , F : Float < Int = I > > ( i : I , n : u32 ) -> ( F :: Int , F :: Int )
46
+ where
47
+ I : Int + CastInto < F :: Int > ,
48
+ {
49
+ let base = ( i << n) >> F :: EXPONENT_BITS ;
50
+
51
+ // Only the lowest `F::EXPONENT_BITS` bits will be truncated. Shift them
52
+ // to the highest position
53
+ let adj = ( i << n) << ( F :: SIGNIFICAND_BITS + 1 ) ;
54
+
55
+ ( base, adj)
56
+ }
57
+
58
+ /// Adjust a mantissa with dropped bits
59
+ fn m_adj < F : Float > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
60
+ // Branchlessly extract a `1` if rounding up should happen
61
+ let adj = ( dropped_bits - ( dropped_bits >> ( F :: BITS - 1 ) & !m_base) ) >> ( F :: BITS - 1 ) ;
62
+
63
+ // Add one when we need to round up. Break ties to even.
64
+ m_base + adj
65
+ }
66
+
67
+ /// Combine a final float repr from an exponent and mantissa.
68
+ fn repr < F : Float > ( e : F :: Int , m : F :: Int ) -> F :: Int {
69
+ // + rather than | so the mantissa can overflow into the exponent
70
+ ( e << F :: SIGNIFICAND_BITS ) + m
71
+ }
72
+
73
+ /// Perform a signed operation as unsigned, then add the sign back
74
+ pub fn signed < I , F , Conv > ( i : I , conv : Conv ) -> F
75
+ where
76
+ F : Float ,
77
+ I : Int ,
78
+ F :: Int : CastFrom < I > ,
79
+ Conv : Fn ( I :: UnsignedInt ) -> F :: Int ,
80
+ {
81
+ let sign_bit = F :: Int :: cast_from ( i >> ( I :: BITS - 1 ) ) << ( F :: BITS - 1 ) ;
82
+ F :: from_repr ( conv ( i. unsigned_abs ( ) ) | sign_bit)
83
+ }
84
+
14
85
pub fn u32_to_f32_bits ( i : u32 ) -> u32 {
15
86
if i == 0 {
16
87
return 0 ;
17
88
}
18
89
let n = i. leading_zeros ( ) ;
19
- let a = ( i << n) >> 8 ; // Significant bits, with bit 24 still in tact.
20
- let b = ( i << n) << 24 ; // Insignificant bits, only relevant for rounding.
21
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
22
- let e = 157 - n; // Exponent plus 127, minus one.
23
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
90
+ let ( m_base, adj) = m_f_eq_i :: < u32 , f32 > ( i, n) ;
91
+ let m = m_adj :: < f32 > ( m_base, adj) ;
92
+ let e = exp :: < u32 , f32 > ( n) ;
93
+ repr :: < f32 > ( e, m)
24
94
}
25
95
26
96
pub fn u32_to_f64_bits ( i : u32 ) -> u64 {
27
97
if i == 0 {
28
98
return 0 ;
29
99
}
30
100
let n = i. leading_zeros ( ) ;
31
- let m = ( i as u64 ) << ( 21 + n) ; // Significant bits, with bit 53 still in tact.
32
- let e = 1053 - n as u64 ; // Exponent plus 1023, minus one.
33
- ( e << 52 ) + m // Bit 53 of m will overflow into e.
101
+ let m = m_f_gt_i :: < _ , f64 > ( i , n) ;
102
+ let e = exp :: < u32 , f64 > ( n ) ;
103
+ repr :: < f64 > ( e , m )
34
104
}
35
105
36
106
pub fn u64_to_f32_bits ( i : u64 ) -> u32 {
37
107
let n = i. leading_zeros ( ) ;
38
- let y = i. wrapping_shl ( n) ;
39
- let a = ( y >> 40 ) as u32 ; // Significant bits, with bit 24 still in tact.
40
- let b = ( y >> 8 | y & 0xFFFF ) as u32 ; // Insignificant bits, only relevant for rounding.
41
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
42
- let e = if i == 0 { 0 } else { 189 - n } ; // Exponent plus 127, minus one, except for zero.
43
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
108
+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
109
+ let m_base = m_base :: < _ , f32 > ( i_m) ;
110
+ // The entire lower half of `i` will be truncated (masked portion), plus the
111
+ // next `EXPONENT_BITS` bits.
112
+ let adj = ( i_m >> f32:: EXPONENT_BITS | i_m & 0xFFFF ) as u32 ;
113
+ let m = m_adj :: < f32 > ( m_base, adj) ;
114
+ let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) } ;
115
+ repr :: < f32 > ( e, m)
44
116
}
45
117
46
118
pub fn u64_to_f64_bits ( i : u64 ) -> u64 {
47
119
if i == 0 {
48
120
return 0 ;
49
121
}
50
122
let n = i. leading_zeros ( ) ;
51
- let a = ( i << n) >> 11 ; // Significant bits, with bit 53 still in tact.
52
- let b = ( i << n) << 53 ; // Insignificant bits, only relevant for rounding.
53
- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
54
- let e = 1085 - n as u64 ; // Exponent plus 1023, minus one.
55
- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
123
+ let ( m_base, adj) = m_f_eq_i :: < u64 , f64 > ( i, n) ;
124
+ let m = m_adj :: < f64 > ( m_base, adj) ;
125
+ let e = exp :: < u64 , f64 > ( n) ;
126
+ repr :: < f64 > ( e, m)
56
127
}
57
128
58
129
pub fn u128_to_f32_bits ( i : u128 ) -> u32 {
59
130
let n = i. leading_zeros ( ) ;
60
- let y = i. wrapping_shl ( n) ;
61
- let a = ( y >> 104 ) as u32 ; // Significant bits, with bit 24 still in tact.
62
- let b = ( y >> 72 ) as u32 | ( ( y << 32 ) >> 32 != 0 ) as u32 ; // Insignificant bits, only relevant for rounding.
63
- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
64
- let e = if i == 0 { 0 } else { 253 - n } ; // Exponent plus 127, minus one, except for zero.
65
- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
131
+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
132
+ let m_base = m_base :: < _ , f32 > ( i_m) ;
133
+
134
+ // Within the upper `F::BITS`, everything except for the signifcand
135
+ // gets truncated
136
+ let d1: u32 = ( i_m >> ( u128:: BITS - f32:: BITS - f32:: SIGNIFICAND_BITS - 1 ) ) . cast ( ) ;
137
+
138
+ // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
139
+ // check if it is nonzero.
140
+ let d2: u32 = ( i_m << f32:: BITS >> f32:: BITS != 0 ) . into ( ) ;
141
+ let adj = d1 | d2;
142
+
143
+ let m = m_adj :: < f32 > ( m_base, adj) ;
144
+ let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) } ;
145
+ repr :: < f32 > ( e, m)
66
146
}
67
147
68
148
pub fn u128_to_f64_bits ( i : u128 ) -> u64 {
69
149
let n = i. leading_zeros ( ) ;
70
- let y = i. wrapping_shl ( n) ;
71
- let a = ( y >> 75 ) as u64 ; // Significant bits, with bit 53 still in tact.
72
- let b = ( y >> 11 | y & 0xFFFF_FFFF ) as u64 ; // Insignificant bits, only relevant for rounding.
73
- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
74
- let e = if i == 0 { 0 } else { 1149 - n as u64 } ; // Exponent plus 1023, minus one, except for zero.
75
- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
150
+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
151
+ let m_base = m_base :: < _ , f64 > ( i_m) ;
152
+ // The entire lower half of `i` will be truncated (masked portion), plus the
153
+ // next `EXPONENT_BITS` bits.
154
+ let adj = ( i_m >> f64:: EXPONENT_BITS | i_m & 0xFFFF_FFFF ) as u64 ;
155
+ let m = m_adj :: < f64 > ( m_base, adj) ;
156
+ let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) } ;
157
+ repr :: < f64 > ( e, m)
76
158
}
77
159
}
78
160
@@ -113,38 +195,32 @@ intrinsics! {
113
195
intrinsics ! {
114
196
#[ arm_aeabi_alias = __aeabi_i2f]
115
197
pub extern "C" fn __floatsisf( i: i32 ) -> f32 {
116
- let sign_bit = ( ( i >> 31 ) as u32 ) << 31 ;
117
- f32 :: from_bits( int_to_float:: u32_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
198
+ int_to_float:: signed( i, int_to_float:: u32_to_f32_bits)
118
199
}
119
200
120
201
#[ arm_aeabi_alias = __aeabi_i2d]
121
202
pub extern "C" fn __floatsidf( i: i32 ) -> f64 {
122
- let sign_bit = ( ( i >> 31 ) as u64 ) << 63 ;
123
- f64 :: from_bits( int_to_float:: u32_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
203
+ int_to_float:: signed( i, int_to_float:: u32_to_f64_bits)
124
204
}
125
205
126
206
#[ arm_aeabi_alias = __aeabi_l2f]
127
207
pub extern "C" fn __floatdisf( i: i64 ) -> f32 {
128
- let sign_bit = ( ( i >> 63 ) as u32 ) << 31 ;
129
- f32 :: from_bits( int_to_float:: u64_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
208
+ int_to_float:: signed( i, int_to_float:: u64_to_f32_bits)
130
209
}
131
210
132
211
#[ arm_aeabi_alias = __aeabi_l2d]
133
212
pub extern "C" fn __floatdidf( i: i64 ) -> f64 {
134
- let sign_bit = ( ( i >> 63 ) as u64 ) << 63 ;
135
- f64 :: from_bits( int_to_float:: u64_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
213
+ int_to_float:: signed( i, int_to_float:: u64_to_f64_bits)
136
214
}
137
215
138
216
#[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
139
217
pub extern "C" fn __floattisf( i: i128 ) -> f32 {
140
- let sign_bit = ( ( i >> 127 ) as u32 ) << 31 ;
141
- f32 :: from_bits( int_to_float:: u128_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
218
+ int_to_float:: signed( i, int_to_float:: u128_to_f32_bits)
142
219
}
143
220
144
221
#[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
145
222
pub extern "C" fn __floattidf( i: i128 ) -> f64 {
146
- let sign_bit = ( ( i >> 127 ) as u64 ) << 63 ;
147
- f64 :: from_bits( int_to_float:: u128_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
223
+ int_to_float:: signed( i, int_to_float:: u128_to_f64_bits)
148
224
}
149
225
}
150
226
0 commit comments