Skip to content

Commit 68696b4

Browse files
committed
Constify vroundpd, add tests for vsubps and vsubpd
Tests are still up in the air because of rust-lang#49.
1 parent 9bc2c1e commit 68696b4

File tree

1 file changed

+55
-33
lines changed

1 file changed

+55
-33
lines changed

src/x86/avx.rs

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use v256::*;
22

3-
// #[cfg(test)]
4-
// use assert_instr::assert_instr;
3+
#[cfg(test)]
4+
use assert_instr::assert_instr;
55

66
/// Add packed double-precision (64-bit) floating-point elements
77
/// in `a` and `b`.
@@ -26,17 +26,11 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
2626
unsafe { addsubpd256(a, b) }
2727
}
2828

29-
#[allow(improper_ctypes)]
30-
extern "C" {
31-
#[link_name = "llvm.x86.avx.addsub.pd.256"]
32-
fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
33-
}
34-
3529
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
3630
/// from packed elements in `a`.
3731
#[inline(always)]
3832
#[target_feature = "+avx"]
39-
// #[cfg_attr(test, assert_instr(subpd))]
33+
#[cfg_attr(test, assert_instr(vsubpd))]
4034
pub fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
4135
a - b
4236
}
@@ -45,56 +39,85 @@ pub fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
4539
/// from packed elements in `a`.
4640
#[inline(always)]
4741
#[target_feature = "+avx"]
48-
// #[cfg_attr(test, assert_instr(subps))]
42+
#[cfg_attr(test, assert_instr(vsubps))]
4943
pub fn _mm256_sub_ps(a: f32x8, b: f32x8) -> f32x8 {
5044
a - b
5145
}
5246

5347
/// Round packed double-precision (64-bit) floating point elements in `a`
5448
/// according to the flag `b`. The value of `b` may be as follows:
55-
/// Bits [7:4] are reserved.
56-
/// Bit [3] is a precision exception value:
57-
/// 0: A normal PE exception is used.
58-
/// 1: The PE field is not updated.
59-
/// Bit [2] is the rounding control source:
60-
/// 0: Use bits [1:0] of \a M.
61-
/// 1: Use the current MXCSR setting.
62-
/// Bits [1:0] contain the rounding control definition:
63-
/// 00: Nearest.
64-
/// 01: Downward (toward negative infinity).
65-
/// 10: Upward (toward positive infinity).
66-
/// 11: Truncated.
49+
/// 0x00: Round to the nearest whole number.
50+
/// 0x01: Round down, toward negative infinity.
51+
/// 0x02: Round up, toward positive infinity.
52+
/// 0x03: Truncate the values.
53+
/// For a few additional values options, check the LLVM docs:
54+
/// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
6755
#[inline(always)]
6856
#[target_feature = "+avx"]
6957
// #[cfg_attr(test, assert_instr(vroundpd))]
58+
// TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
7059
pub fn _mm256_round_pd(a: f64x4, b: i32) -> f64x4 {
71-
unsafe { roundpd256(a, b) }
72-
}
73-
74-
#[allow(improper_ctypes)]
75-
extern "C" {
76-
#[link_name = "llvm.x86.avx.round.pd.256"]
77-
fn roundpd256(a: f64x4, b: i32) -> f64x4;
60+
macro_rules! call {
61+
($imm8:expr) => {
62+
unsafe { roundpd256(a, $imm8) }
63+
}
64+
}
65+
constify_imm8!(b, call)
7866
}
7967

8068
/// Round packed double-precision (64-bit) floating point elements in `a` toward
8169
/// positive infinity.
8270
#[inline(always)]
8371
#[target_feature = "+avx"]
8472
// #[cfg_attr(test, assert_instr(vroundpd))]
73+
// TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
8574
pub fn _mm256_ceil_pd(a: f64x4) -> f64x4 {
86-
_mm256_round_pd(a, 0b00000010)
75+
unsafe { roundpd256(a, 0x02) }
8776
}
8877

8978
/// Round packed double-precision (64-bit) floating point elements in `a` toward
90-
/// positive infinity.
79+
/// negative infinity.
9180
#[inline(always)]
9281
#[target_feature = "+avx"]
9382
// #[cfg_attr(test, assert_instr(vroundpd))]
83+
// TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
9484
pub fn _mm256_floor_pd(a: f64x4) -> f64x4 {
95-
_mm256_round_pd(a, 0b00000001)
85+
unsafe { roundpd256(a, 0x01) }
9686
}
9787

88+
/// LLVM intrinsics used in the above functions
89+
#[allow(improper_ctypes)]
90+
extern "C" {
91+
#[link_name = "llvm.x86.avx.addsub.pd.256"]
92+
fn addsubpd256(a: f64x4, b: f64x4) -> f64x4;
93+
#[link_name = "llvm.x86.avx.round.pd.256"]
94+
fn roundpd256(a: f64x4, b: i32) -> f64x4;
95+
}
96+
97+
// Function stubs: work around assert_instr issues in expanded forms
98+
// ref: https://github.com/rust-lang-nursery/stdsimd/issues/49
99+
// ref: https://github.com/rust-lang-nursery/stdsimd/issues/47
100+
101+
// #[cfg(test)]
102+
// #[target_feature = "+avx"]
103+
// #[cfg_attr(test, assert_instr(vroundpd))]
104+
// pub fn _mm256_round_pd_auto(a: f64x4, b: i32) -> f64x4 {
105+
// return _mm256_round_pd(a, b);
106+
// }
107+
108+
// #[cfg(test)]
109+
// #[target_feature = "+avx"]
110+
// #[cfg_attr(test, assert_instr(vroundpd))]
111+
// pub fn _mm256_ceil_pd_auto(a: f64x4) -> f64x4 {
112+
// return _mm256_ceil_pd(a);
113+
// }
114+
115+
// #[cfg(test)]
116+
// #[target_feature = "+avx"]
117+
// #[cfg_attr(test, assert_instr(vroundpd))]
118+
// pub fn _mm256_floor_pd_auto(a: f64x4) -> f64x4 {
119+
// return _mm256_floor_pd(a);
120+
// }
98121

99122
#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))]
100123
mod tests {
@@ -183,5 +206,4 @@ mod tests {
183206
let expected_up = f64x4::new(2.0, 3.0, 4.0, -1.0);
184207
assert_eq!(result_up, expected_up);
185208
}
186-
187209
}

0 commit comments

Comments
 (0)