1
1
use v256:: * ;
2
2
3
- // #[cfg(test)]
4
- // use assert_instr::assert_instr;
3
+ #[ cfg( test) ]
4
+ use assert_instr:: assert_instr;
5
5
6
6
/// Add packed double-precision (64-bit) floating-point elements
7
7
/// in `a` and `b`.
@@ -26,17 +26,11 @@ pub fn _mm256_addsub_pd(a: f64x4, b: f64x4) -> f64x4 {
26
26
unsafe { addsubpd256 ( a, b) }
27
27
}
28
28
29
- #[ allow( improper_ctypes) ]
30
- extern "C" {
31
- #[ link_name = "llvm.x86.avx.addsub.pd.256" ]
32
- fn addsubpd256 ( a : f64x4 , b : f64x4 ) -> f64x4 ;
33
- }
34
-
35
29
/// Subtract packed double-precision (64-bit) floating-point elements in `b`
36
30
/// from packed elements in `a`.
37
31
#[ inline( always) ]
38
32
#[ target_feature = "+avx" ]
39
- // #[cfg_attr(test, assert_instr(subpd ))]
33
+ #[ cfg_attr( test, assert_instr( vsubpd ) ) ]
40
34
pub fn _mm256_sub_pd ( a : f64x4 , b : f64x4 ) -> f64x4 {
41
35
a - b
42
36
}
@@ -45,56 +39,85 @@ pub fn _mm256_sub_pd(a: f64x4, b: f64x4) -> f64x4 {
45
39
/// from packed elements in `a`.
46
40
#[ inline( always) ]
47
41
#[ target_feature = "+avx" ]
48
- // #[cfg_attr(test, assert_instr(subps ))]
42
+ #[ cfg_attr( test, assert_instr( vsubps ) ) ]
49
43
pub fn _mm256_sub_ps ( a : f32x8 , b : f32x8 ) -> f32x8 {
50
44
a - b
51
45
}
52
46
53
47
/// Round packed double-precision (64-bit) floating point elements in `a`
54
48
/// according to the flag `b`. The value of `b` may be as follows:
55
- /// Bits [7:4] are reserved.
56
- /// Bit [3] is a precision exception value:
57
- /// 0: A normal PE exception is used.
58
- /// 1: The PE field is not updated.
59
- /// Bit [2] is the rounding control source:
60
- /// 0: Use bits [1:0] of \a M.
61
- /// 1: Use the current MXCSR setting.
62
- /// Bits [1:0] contain the rounding control definition:
63
- /// 00: Nearest.
64
- /// 01: Downward (toward negative infinity).
65
- /// 10: Upward (toward positive infinity).
66
- /// 11: Truncated.
49
+ /// 0x00: Round to the nearest whole number.
50
+ /// 0x01: Round down, toward negative infinity.
51
+ /// 0x02: Round up, toward positive infinity.
52
+ /// 0x03: Truncate the values.
53
+ /// For a few additional values options, check the LLVM docs:
54
+ /// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
67
55
#[ inline( always) ]
68
56
#[ target_feature = "+avx" ]
69
57
// #[cfg_attr(test, assert_instr(vroundpd))]
58
+ // TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
70
59
pub fn _mm256_round_pd ( a : f64x4 , b : i32 ) -> f64x4 {
71
- unsafe { roundpd256 ( a, b) }
72
- }
73
-
74
- #[ allow( improper_ctypes) ]
75
- extern "C" {
76
- #[ link_name = "llvm.x86.avx.round.pd.256" ]
77
- fn roundpd256 ( a : f64x4 , b : i32 ) -> f64x4 ;
60
+ macro_rules! call {
61
+ ( $imm8: expr) => {
62
+ unsafe { roundpd256( a, $imm8) }
63
+ }
64
+ }
65
+ constify_imm8 ! ( b, call)
78
66
}
79
67
80
68
/// Round packed double-precision (64-bit) floating point elements in `a` toward
81
69
/// positive infinity.
82
70
#[ inline( always) ]
83
71
#[ target_feature = "+avx" ]
84
72
// #[cfg_attr(test, assert_instr(vroundpd))]
73
+ // TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
85
74
pub fn _mm256_ceil_pd ( a : f64x4 ) -> f64x4 {
86
- _mm256_round_pd ( a, 0b00000010 )
75
+ unsafe { roundpd256 ( a, 0x02 ) }
87
76
}
88
77
89
78
/// Round packed double-precision (64-bit) floating point elements in `a` toward
90
- /// positive infinity.
79
+ /// negative infinity.
91
80
#[ inline( always) ]
92
81
#[ target_feature = "+avx" ]
93
82
// #[cfg_attr(test, assert_instr(vroundpd))]
83
+ // TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
94
84
pub fn _mm256_floor_pd ( a : f64x4 ) -> f64x4 {
95
- _mm256_round_pd ( a, 0b00000001 )
85
+ unsafe { roundpd256 ( a, 0x01 ) }
96
86
}
97
87
88
+ /// LLVM intrinsics used in the above functions
89
+ #[ allow( improper_ctypes) ]
90
+ extern "C" {
91
+ #[ link_name = "llvm.x86.avx.addsub.pd.256" ]
92
+ fn addsubpd256 ( a : f64x4 , b : f64x4 ) -> f64x4 ;
93
+ #[ link_name = "llvm.x86.avx.round.pd.256" ]
94
+ fn roundpd256 ( a : f64x4 , b : i32 ) -> f64x4 ;
95
+ }
96
+
97
+ // Function stubs: work around assert_instr issues in expanded forms
98
+ // ref: https://github.com/rust-lang-nursery/stdsimd/issues/49
99
+ // ref: https://github.com/rust-lang-nursery/stdsimd/issues/47
100
+
101
+ // #[cfg(test)]
102
+ // #[target_feature = "+avx"]
103
+ // #[cfg_attr(test, assert_instr(vroundpd))]
104
+ // pub fn _mm256_round_pd_auto(a: f64x4, b: i32) -> f64x4 {
105
+ // return _mm256_round_pd(a, b);
106
+ // }
107
+
108
+ // #[cfg(test)]
109
+ // #[target_feature = "+avx"]
110
+ // #[cfg_attr(test, assert_instr(vroundpd))]
111
+ // pub fn _mm256_ceil_pd_auto(a: f64x4) -> f64x4 {
112
+ // return _mm256_ceil_pd(a);
113
+ // }
114
+
115
+ // #[cfg(test)]
116
+ // #[target_feature = "+avx"]
117
+ // #[cfg_attr(test, assert_instr(vroundpd))]
118
+ // pub fn _mm256_floor_pd_auto(a: f64x4) -> f64x4 {
119
+ // return _mm256_floor_pd(a);
120
+ // }
98
121
99
122
#[ cfg( all( test, target_feature = "avx" , any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ]
100
123
mod tests {
@@ -183,5 +206,4 @@ mod tests {
183
206
let expected_up = f64x4:: new ( 2.0 , 3.0 , 4.0 , -1.0 ) ;
184
207
assert_eq ! ( result_up, expected_up) ;
185
208
}
186
-
187
209
}
0 commit comments