@@ -12,6 +12,16 @@ use stdsimd_test::assert_instr;
12
12
13
13
#[ allow( improper_ctypes) ]
14
14
extern "C" {
15
+ #[ link_name = "llvm.x86.sse.cvtpi2ps" ]
16
+ fn cvtpi2ps ( a : f32x4 , b : __m64 ) -> f32x4 ;
17
+ #[ link_name = "llvm.x86.mmx.pextr.w" ]
18
+ fn pextrw ( a : __m64 , imm8 : i32 ) -> i32 ;
19
+ #[ link_name = "llvm.x86.mmx.pinsr.w" ]
20
+ fn pinsrw ( a : __m64 , d : i32 , imm8 : i32 ) -> __m64 ;
21
+ #[ link_name = "llvm.x86.mmx.pmovmskb" ]
22
+ fn pmovmskb ( a : __m64 ) -> i32 ;
23
+ #[ link_name = "llvm.x86.sse.pshuf.w" ]
24
+ fn pshufw ( a : __m64 , imm8 : i8 ) -> __m64 ;
15
25
#[ link_name = "llvm.x86.mmx.pmaxs.w" ]
16
26
fn pmaxsw ( a : __m64 , b : __m64 ) -> __m64 ;
17
27
#[ link_name = "llvm.x86.mmx.pmaxu.b" ]
@@ -98,6 +108,64 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
98
108
_mm_min_pu8 ( a, b)
99
109
}
100
110
111
+ /// Converts two elements of a 64-bit vector of [2 x i32] into two
112
+ /// floating point values and writes them to the lower 64-bits of the
113
+ /// destination. The remaining higher order elements of the destination are
114
+ /// copied from the corresponding elements in the first operand.
115
+ #[ inline( always) ]
116
+ #[ target_feature = "+sse" ]
117
+ #[ cfg_attr( test, assert_instr( cvtpi2ps) ) ]
118
+ pub unsafe fn _mm_cvt_pi2ps ( a : f32x4 , b : i32x2 ) -> f32x4 {
119
+ cvtpi2ps ( a, mem:: transmute ( b) )
120
+ }
121
+
122
+ /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
123
+ /// returns it, as specified by the immediate integer operand.
124
+ #[ inline( always) ]
125
+ #[ target_feature = "+sse" ]
126
+ #[ cfg_attr( test, assert_instr( pextrw, imm2 = 0 ) ) ]
127
+ pub unsafe fn _mm_extract_pi16 ( a : i16x4 , imm2 : i32 ) -> i16 {
128
+ macro_rules! call {
129
+ ( $imm2: expr) => { pextrw( mem:: transmute( a) , $imm2) as i16 }
130
+ }
131
+ constify_imm2 ! ( imm2, call)
132
+ }
133
+
134
+ /// Copies data from the 64-bit vector of [4 x i16] to the destination,
135
+ /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
136
+ /// specified by the immediate operand `n`.
137
+ #[ inline( always) ]
138
+ #[ target_feature = "+sse" ]
139
+ #[ cfg_attr( test, assert_instr( pinsrw, imm2 = 0 ) ) ]
140
+ pub unsafe fn _mm_insert_pi16 ( a : i16x4 , d : i32 , imm2 : i32 ) -> i16x4 {
141
+ macro_rules! call {
142
+ ( $imm2: expr) => { mem:: transmute( pinsrw( mem:: transmute( a) , d, $imm2) ) }
143
+ }
144
+ constify_imm2 ! ( imm2, call)
145
+ }
146
+
147
+ /// Takes the most significant bit from each 8-bit element in a 64-bit
148
+ /// integer vector to create a 16-bit mask value. Zero-extends the value to
149
+ /// 32-bit integer and writes it to the destination.
150
+ #[ inline( always) ]
151
+ #[ target_feature = "+sse" ]
152
+ #[ cfg_attr( test, assert_instr( pmovmskb) ) ]
153
+ pub unsafe fn _mm_movemask_pi8 ( a : i16x4 ) -> i32 {
154
+ pmovmskb ( mem:: transmute ( a) )
155
+ }
156
+
157
+ /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
158
+ /// destination, as specified by the immediate value operand.
159
+ #[ inline( always) ]
160
+ #[ target_feature = "+sse" ]
161
+ #[ cfg_attr( test, assert_instr( pshufw, imm8 = 0 ) ) ]
162
+ pub unsafe fn _mm_shuffle_pi16 ( a : i16x4 , imm8 : i8 ) -> i16x4 {
163
+ macro_rules! call {
164
+ ( $imm8: expr) => { mem:: transmute( pshufw( mem:: transmute( a) , $imm8) ) }
165
+ }
166
+ constify_imm8 ! ( imm8, call)
167
+ }
168
+
101
169
/// Convert the two lower packed single-precision (32-bit) floating-point
102
170
/// elements in `a` to packed 32-bit integers with truncation.
103
171
#[ inline( always) ]
@@ -205,6 +273,50 @@ mod tests {
205
273
assert_eq ! ( r, sse:: _m_pminub( a, b) ) ;
206
274
}
207
275
276
+ #[ simd_test = "sse" ]
277
+ unsafe fn _mm_cvt_pi2ps ( ) {
278
+ let a = f32x4:: new ( 0. , 0. , 3. , 4. ) ;
279
+ let b = i32x2:: new ( 1 , 2 ) ;
280
+ let expected = f32x4:: new ( 1. , 2. , 3. , 4. ) ;
281
+ let r = sse:: _mm_cvt_pi2ps ( a, b) ;
282
+ assert_eq ! ( r, expected) ;
283
+ }
284
+
285
+ #[ simd_test = "sse" ]
286
+ unsafe fn _mm_extract_pi16 ( ) {
287
+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
288
+ let r = sse:: _mm_extract_pi16 ( a, 0 ) ;
289
+ assert_eq ! ( r, 1 ) ;
290
+ let r = sse:: _mm_extract_pi16 ( a, 1 ) ;
291
+ assert_eq ! ( r, 2 ) ;
292
+ }
293
+
294
+ #[ simd_test = "sse" ]
295
+ unsafe fn _mm_insert_pi16 ( ) {
296
+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
297
+ let r = sse:: _mm_insert_pi16 ( a, 0 , 0b0 ) ;
298
+ let expected = i16x4:: new ( 0 , 2 , 3 , 4 ) ;
299
+ assert_eq ! ( r, expected) ;
300
+ let r = sse:: _mm_insert_pi16 ( a, 0 , 0b10 ) ;
301
+ let expected = i16x4:: new ( 1 , 2 , 0 , 4 ) ;
302
+ assert_eq ! ( r, expected) ;
303
+ }
304
+
305
+ #[ simd_test = "sse" ]
306
+ unsafe fn _mm_movemask_pi8 ( ) {
307
+ let a = i16x4:: new ( 0b1000_0000 , 0b0100_0000 , 0b1000_0000 , 0b0100_0000 ) ;
308
+ let r = sse:: _mm_movemask_pi8 ( a) ;
309
+ assert_eq ! ( r, 0b10001 ) ;
310
+ }
311
+
312
+ #[ simd_test = "sse" ]
313
+ unsafe fn _mm_shuffle_pi16 ( ) {
314
+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
315
+ let r = sse:: _mm_shuffle_pi16 ( a, 0b00_01_01_11 ) ;
316
+ let expected = i16x4:: new ( 4 , 2 , 2 , 1 ) ;
317
+ assert_eq ! ( r, expected) ;
318
+ }
319
+
208
320
#[ simd_test = "sse" ]
209
321
unsafe fn _mm_cvtps_pi32 ( ) {
210
322
let a = f32x4:: new ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
0 commit comments