Skip to content

Commit 3559e70

Browse files
authored
Merge pull request rust-lang#152 from alexcrichton/wasm-sqrt
Optimize intrinsics on wasm32
2 parents 8e857c7 + ad5641b commit 3559e70

14 files changed

+112
-0
lines changed

.travis.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ matrix:
2929
- env: TARGET=cargo-fmt
3030
rust: beta
3131

32+
- env: TARGET=wasm32-unknown-unknown
33+
rust: nightly
34+
install: rustup target add $TARGET
35+
script:
36+
- cargo build --target $TARGET
37+
- cargo build --no-default-features --target $TARGET
38+
3239
before_install: set -e
3340

3441
install:

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ version = "0.1.2"
1212
[features]
1313
# only used to run our test suite
1414
checked = []
15+
default = ['stable']
16+
stable = []
1517

1618
[workspace]
1719
members = [

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
1212
#![deny(warnings)]
1313
#![no_std]
14+
#![cfg_attr(
15+
all(target_arch = "wasm32", not(feature = "stable")),
16+
feature(core_intrinsics)
17+
)]
1418

1519
mod math;
1620

src/math/ceil.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
44

55
#[inline]
66
pub fn ceil(x: f64) -> f64 {
7+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
8+
// `f64.ceil` native instruction, so we can leverage this for both code size
9+
// and speed.
10+
llvm_intrinsically_optimized! {
11+
#[cfg(target_arch = "wasm32")] {
12+
return unsafe { ::core::intrinsics::ceilf64(x) }
13+
}
14+
}
715
let u: u64 = x.to_bits();
816
let e: i64 = (u >> 52 & 0x7ff) as i64;
917
let y: f64;

src/math/ceilf.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ use core::f32;
22

33
#[inline]
44
pub fn ceilf(x: f32) -> f32 {
5+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6+
// `f32.ceil` native instruction, so we can leverage this for both code size
7+
// and speed.
8+
llvm_intrinsically_optimized! {
9+
#[cfg(target_arch = "wasm32")] {
10+
return unsafe { ::core::intrinsics::ceilf32(x) }
11+
}
12+
}
513
let mut ui = x.to_bits();
614
let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
715

src/math/fabs.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,13 @@ use core::u64;
22

33
#[inline]
44
pub fn fabs(x: f64) -> f64 {
5+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6+
// `f64.abs` native instruction, so we can leverage this for both code size
7+
// and speed.
8+
llvm_intrinsically_optimized! {
9+
#[cfg(target_arch = "wasm32")] {
10+
return unsafe { ::core::intrinsics::fabsf64(x) }
11+
}
12+
}
513
f64::from_bits(x.to_bits() & (u64::MAX / 2))
614
}

src/math/fabsf.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
#[inline]
22
pub fn fabsf(x: f32) -> f32 {
3+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
4+
// `f32.abs` native instruction, so we can leverage this for both code size
5+
// and speed.
6+
llvm_intrinsically_optimized! {
7+
#[cfg(target_arch = "wasm32")] {
8+
return unsafe { ::core::intrinsics::fabsf32(x) }
9+
}
10+
}
311
f32::from_bits(x.to_bits() & 0x7fffffff)
412
}

src/math/floor.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
44

55
#[inline]
66
pub fn floor(x: f64) -> f64 {
7+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
8+
// `f64.floor` native instruction, so we can leverage this for both code size
9+
// and speed.
10+
llvm_intrinsically_optimized! {
11+
#[cfg(target_arch = "wasm32")] {
12+
return unsafe { ::core::intrinsics::floorf64(x) }
13+
}
14+
}
715
let ui = x.to_bits();
816
let e = ((ui >> 52) & 0x7ff) as i32;
917

src/math/floorf.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ use core::f32;
22

33
#[inline]
44
pub fn floorf(x: f32) -> f32 {
5+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6+
// `f32.floor` native instruction, so we can leverage this for both code size
7+
// and speed.
8+
llvm_intrinsically_optimized! {
9+
#[cfg(target_arch = "wasm32")] {
10+
return unsafe { ::core::intrinsics::floorf32(x) }
11+
}
12+
}
513
let mut ui = x.to_bits();
614
let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
715

src/math/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ macro_rules! i {
5858
};
5959
}
6060

61+
macro_rules! llvm_intrinsically_optimized {
62+
(#[cfg($($clause:tt)*)] $e:expr) => {
63+
#[cfg(all(not(feature = "stable"), $($clause)*))]
64+
{
65+
if true { // thwart the dead code lint
66+
$e
67+
}
68+
}
69+
};
70+
}
71+
6172
// Public modules
6273
mod acos;
6374
mod acosf;

src/math/sqrt.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,18 @@ const TINY: f64 = 1.0e-300;
8282

8383
#[inline]
8484
pub fn sqrt(x: f64) -> f64 {
85+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
86+
// `f64.sqrt` native instruction, so we can leverage this for both code size
87+
// and speed.
88+
llvm_intrinsically_optimized! {
89+
#[cfg(target_arch = "wasm32")] {
90+
return if x < 0.0 {
91+
f64::NAN
92+
} else {
93+
unsafe { ::core::intrinsics::sqrtf64(x) }
94+
}
95+
}
96+
}
8597
let mut z: f64;
8698
let sign: u32 = 0x80000000;
8799
let mut ix0: i32;

src/math/sqrtf.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ const TINY: f32 = 1.0e-30;
1717

1818
#[inline]
1919
pub fn sqrtf(x: f32) -> f32 {
20+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
21+
// `f32.sqrt` native instruction, so we can leverage this for both code size
22+
// and speed.
23+
llvm_intrinsically_optimized! {
24+
#[cfg(target_arch = "wasm32")] {
25+
return if x < 0.0 {
26+
::core::f32::NAN
27+
} else {
28+
unsafe { ::core::intrinsics::sqrtf32(x) }
29+
}
30+
}
31+
}
2032
let mut z: f32;
2133
let sign: i32 = 0x80000000u32 as i32;
2234
let mut ix: i32;

src/math/trunc.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ use core::f64;
22

33
#[inline]
44
pub fn trunc(x: f64) -> f64 {
5+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6+
// `f64.trunc` native instruction, so we can leverage this for both code size
7+
// and speed.
8+
llvm_intrinsically_optimized! {
9+
#[cfg(target_arch = "wasm32")] {
10+
return unsafe { ::core::intrinsics::truncf64(x) }
11+
}
12+
}
513
let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
614

715
let mut i: u64 = x.to_bits();

src/math/truncf.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ use core::f32;
22

33
#[inline]
44
pub fn truncf(x: f32) -> f32 {
5+
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
6+
// `f32.trunc` native instruction, so we can leverage this for both code size
7+
// and speed.
8+
llvm_intrinsically_optimized! {
9+
#[cfg(target_arch = "wasm32")] {
10+
return unsafe { ::core::intrinsics::truncf32(x) }
11+
}
12+
}
513
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
614

715
let mut i: u32 = x.to_bits();

0 commit comments

Comments
 (0)