Skip to content

Commit 6d67a06

Browse files
implement fma
1 parent f9f234f commit 6d67a06

File tree

4 files changed

+204
-3
lines changed

4 files changed

+204
-3
lines changed

src/lib.rs

-2
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@ pub trait F64Ext: private::Sealed {
366366
#[cfg(todo)]
367367
fn signum(self) -> Self;
368368

369-
#[cfg(todo)]
370369
fn mul_add(self, a: Self, b: Self) -> Self;
371370

372371
#[cfg(todo)]
@@ -485,7 +484,6 @@ impl F64Ext for f64 {
485484
fabs(self)
486485
}
487486

488-
#[cfg(todo)]
489487
#[inline]
490488
fn mul_add(self, a: Self, b: Self) -> Self {
491489
fma(self, a, b)

src/math/fma.rs

+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
use core::{f32, f64};
2+
3+
use super::scalbn;
4+
5+
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
6+
7+
struct Num {
8+
m: u64,
9+
e: i32,
10+
sign: i32,
11+
}
12+
13+
#[inline]
14+
fn normalize(x: f64) -> Num {
15+
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
16+
17+
let mut ix: u64 = x.to_bits();
18+
let mut e: i32 = (ix >> 52) as i32;
19+
let sign: i32 = e & 0x800;
20+
e &= 0x7ff;
21+
if e == 0 {
22+
ix = (x * x1p63).to_bits();
23+
e = (ix >> 52) as i32 & 0x7ff;
24+
e = if e != 0 { e - 63 } else { 0x800 };
25+
}
26+
ix &= (1 << 52) - 1;
27+
ix |= 1 << 52;
28+
ix <<= 1;
29+
e -= 0x3ff + 52 + 1;
30+
Num { m: ix, e, sign }
31+
}
32+
33+
#[inline]
34+
fn mul(x: u64, y: u64) -> (u64, u64) {
35+
let t1: u64;
36+
let t2: u64;
37+
let t3: u64;
38+
let xlo: u64 = x as u32 as u64;
39+
let xhi: u64 = x >> 32;
40+
let ylo: u64 = y as u32 as u64;
41+
let yhi: u64 = y >> 32;
42+
43+
t1 = xlo * ylo;
44+
t2 = xlo * yhi + xhi * ylo;
45+
t3 = xhi * yhi;
46+
let lo = t1 + (t2 << 32);
47+
let hi = t3 + (t2 >> 32) + (t1 > lo) as u64;
48+
(hi, lo)
49+
}
50+
51+
#[inline]
52+
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
53+
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
54+
let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
55+
56+
/* normalize so top 10bits and last bit are 0 */
57+
let nx = normalize(x);
58+
let ny = normalize(y);
59+
let nz = normalize(z);
60+
61+
if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
62+
return x * y + z;
63+
}
64+
if nz.e >= ZEROINFNAN {
65+
if nz.e > ZEROINFNAN {
66+
/* z==0 */
67+
return x * y + z;
68+
}
69+
return z;
70+
}
71+
72+
/* mul: r = x*y */
73+
let zhi: u64;
74+
let zlo: u64;
75+
let (mut rhi, mut rlo) = mul(nx.m, ny.m);
76+
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
77+
78+
/* align exponents */
79+
let mut e: i32 = nx.e + ny.e;
80+
let mut d: i32 = nz.e - e;
81+
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
82+
if d > 0 {
83+
if d < 64 {
84+
zlo = nz.m << d;
85+
zhi = nz.m >> 64 - d;
86+
} else {
87+
zlo = 0;
88+
zhi = nz.m;
89+
e = nz.e - 64;
90+
d -= 64;
91+
if d == 0 {
92+
} else if d < 64 {
93+
rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64;
94+
rhi = rhi >> d;
95+
} else {
96+
rlo = 1;
97+
rhi = 0;
98+
}
99+
}
100+
} else {
101+
zhi = 0;
102+
d = -d;
103+
if d == 0 {
104+
zlo = nz.m;
105+
} else if d < 64 {
106+
zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64;
107+
} else {
108+
zlo = 1;
109+
}
110+
}
111+
112+
/* add */
113+
let mut sign: i32 = nx.sign ^ ny.sign;
114+
let samesign: bool = (sign ^ nz.sign) == 0;
115+
let mut nonzero: i32 = 1;
116+
if samesign {
117+
/* r += z */
118+
rlo += zlo;
119+
rhi += zhi + (rlo < zlo) as u64;
120+
} else {
121+
/* r -= z */
122+
let t = rlo;
123+
rlo -= zlo;
124+
rhi = rhi - zhi - (t < rlo) as u64;
125+
if (rhi >> 63) != 0 {
126+
rlo = (-(rlo as i64)) as u64;
127+
rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64;
128+
sign = (sign == 0) as i32;
129+
}
130+
nonzero = (rhi != 0) as i32;
131+
}
132+
133+
/* set rhi to top 63bit of the result (last bit is sticky) */
134+
if nonzero != 0 {
135+
e += 64;
136+
d = rhi.leading_zeros() as i32 - 1;
137+
/* note: d > 0 */
138+
rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64;
139+
} else if rlo != 0 {
140+
d = rlo.leading_zeros() as i32 - 1;
141+
if d < 0 {
142+
rhi = rlo >> 1 | (rlo & 1);
143+
} else {
144+
rhi = rlo << d;
145+
}
146+
} else {
147+
/* exact +-0 */
148+
return x * y + z;
149+
}
150+
e -= d;
151+
152+
/* convert to double */
153+
let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
154+
if sign != 0 {
155+
i = -i;
156+
}
157+
let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
158+
159+
if e < -1022 - 62 {
160+
/* result is subnormal before rounding */
161+
if e == -1022 - 63 {
162+
let mut c: f64 = x1p63;
163+
if sign != 0 {
164+
c = -c;
165+
}
166+
if r == c {
167+
/* min normal after rounding, underflow depends
168+
on arch behaviour which can be imitated by
169+
a double to float conversion */
170+
let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
171+
return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
172+
}
173+
/* one bit is lost when scaled, add another top bit to
174+
only round once at conversion if it is inexact */
175+
if (rhi << 53) != 0 {
176+
i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
177+
if sign != 0 {
178+
i = -i;
179+
}
180+
r = i as f64;
181+
r = 2. * r - c; /* remove top bit */
182+
183+
/* raise underflow portably, such that it
184+
cannot be optimized away */
185+
{
186+
let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
187+
r += (tiny * tiny) * (r - r);
188+
}
189+
}
190+
} else {
191+
/* only round once when scaled */
192+
d = 10;
193+
i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64;
194+
if sign != 0 {
195+
i = -i;
196+
}
197+
r = i as f64;
198+
}
199+
}
200+
scalbn(r, e)
201+
}

src/math/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ mod fdim;
2222
mod fdimf;
2323
mod floor;
2424
mod floorf;
25+
mod fma;
2526
mod fmod;
2627
mod fmodf;
2728
mod hypot;
@@ -61,6 +62,7 @@ pub use self::fdim::fdim;
6162
pub use self::fdimf::fdimf;
6263
pub use self::floor::floor;
6364
pub use self::floorf::floorf;
65+
pub use self::fma::fma;
6466
pub use self::fmod::fmod;
6567
pub use self::fmodf::fmodf;
6668
pub use self::hypot::hypot;

test-generator/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ f64f64_f64! {
732732

733733
// With signature `fn(f64, f64, f64) -> f64`
734734
f64f64f64_f64! {
735-
// fma,
735+
fma,
736736
}
737737

738738
// With signature `fn(f64, i32) -> f64`

0 commit comments

Comments
 (0)