@@ -16,7 +16,7 @@ use stdsimd_test::assert_instr;
16
16
17
17
/// Constructs a 64-bit integer vector initialized to zero.
18
18
#[ inline( always) ]
19
- #[ target_feature = "+mmx,+sse " ]
19
+ #[ target_feature = "+mmx" ]
20
20
// FIXME: this produces a movl instead of xorps on x86
21
21
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
22
22
#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( xor) ) ]
@@ -30,7 +30,7 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
30
30
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
31
31
/// less than 0x80 are saturated to 0x80.
32
32
#[ inline( always) ]
33
- #[ target_feature = "+mmx,+sse " ]
33
+ #[ target_feature = "+mmx" ]
34
34
#[ cfg_attr( test, assert_instr( packsswb) ) ]
35
35
pub unsafe fn _mm_packs_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
36
36
packsswb ( a, b)
@@ -42,63 +42,93 @@ pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
42
42
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
43
43
/// less than 0x80 are saturated to 0x80.
44
44
#[ inline( always) ]
45
- #[ target_feature = "+mmx,+sse " ]
45
+ #[ target_feature = "+mmx" ]
46
46
#[ cfg_attr( test, assert_instr( packssdw) ) ]
47
47
pub unsafe fn _mm_packs_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
48
48
packssdw ( a, b)
49
49
}
50
50
51
- /// Compares the 8-bit integer elements of two 64-bit integer vectors of
52
- /// [8 x i8] to determine if the element of the first vector is greater than
53
- /// the corresponding element of the second vector.
54
- ///
55
- /// The comparison yields 0 for false, 0xFF for true.
51
+ /// Compares whether each element of `a` is greater than the corresponding
52
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
56
53
#[ inline( always) ]
57
54
#[ target_feature = "+mmx" ]
58
55
#[ cfg_attr( test, assert_instr( pcmpgtb) ) ]
59
56
pub unsafe fn _mm_cmpgt_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
60
57
pcmpgtb ( a, b)
61
58
}
62
59
63
- /// Compares the 16-bit integer elements of two 64-bit integer vectors of
64
- /// [4 x i16] to determine if the element of the first vector is greater than
65
- /// the corresponding element of the second vector.
66
- ///
67
- /// The comparison yields 0 for false, 0xFFFF for true.
60
+ /// Compares whether each element of `a` is greater than the corresponding
61
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
68
62
#[ inline( always) ]
69
63
#[ target_feature = "+mmx" ]
70
64
#[ cfg_attr( test, assert_instr( pcmpgtw) ) ]
71
65
pub unsafe fn _mm_cmpgt_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
72
66
pcmpgtw ( a, b)
73
67
}
74
68
75
- /// Unpacks the upper 32 bits from two 64-bit integer vectors of
76
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
69
+ /// Compares whether each element of `a` is greater than the corresponding
70
+ /// element of `b` returning `0` for `false` and `-1` for `true`.
71
+ #[ inline( always) ]
72
+ #[ target_feature = "+mmx" ]
73
+ #[ cfg_attr( test, assert_instr( pcmpgtd) ) ]
74
+ pub unsafe fn _mm_cmpgt_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
75
+ pcmpgtd ( a, b)
76
+ }
77
+
78
+ /// Unpacks the upper two elements from two `i16x4` vectors and interleaves
79
+ /// them into the result: `[a.2, b.2, a.3, b.3]`.
77
80
#[ inline( always) ]
78
81
#[ target_feature = "+mmx" ]
79
82
#[ cfg_attr( test, assert_instr( punpckhwd) ) ] // FIXME punpcklbw expected
80
83
pub unsafe fn _mm_unpackhi_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
81
84
punpckhwd ( a, b)
82
85
}
83
86
84
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
85
- /// and interleaves them into a 64-bit integer vector of [8 x i8].
87
+ /// Unpacks the upper four elements from two `i8x8` vectors and interleaves
88
+ /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
89
+ #[ inline( always) ]
90
+ #[ target_feature = "+mmx" ]
91
+ #[ cfg_attr( test, assert_instr( punpckhbw) ) ]
92
+ pub unsafe fn _mm_unpackhi_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
93
+ punpckhbw ( a, b)
94
+ }
95
+
96
+ /// Unpacks the lower four elements from two `i8x8` vectors and interleaves
97
+ /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
86
98
#[ inline( always) ]
87
99
#[ target_feature = "+mmx" ]
88
100
#[ cfg_attr( test, assert_instr( punpcklbw) ) ]
89
101
pub unsafe fn _mm_unpacklo_pi8 ( a : __m64 , b : __m64 ) -> __m64 {
90
102
punpcklbw ( a, b)
91
103
}
92
104
93
- /// Unpacks the lower 32 bits from two 64-bit integer vectors of
94
- /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16] .
105
+ /// Unpacks the lower two elements from two `i16x4` vectors and interleaves
106
+ /// them into the result: `[a.0 b.0 a.1 b.1]` .
95
107
#[ inline( always) ]
96
108
#[ target_feature = "+mmx" ]
97
109
#[ cfg_attr( test, assert_instr( punpcklwd) ) ]
98
110
pub unsafe fn _mm_unpacklo_pi16 ( a : __m64 , b : __m64 ) -> __m64 {
99
111
punpcklwd ( a, b)
100
112
}
101
113
114
+ /// Unpacks the upper element from two `i32x2` vectors and interleaves them
115
+ /// into the result: `[a.1, b.1]`.
116
+ #[ inline( always) ]
117
+ #[ target_feature = "+mmx" ]
118
+ #[ cfg_attr( test, assert_instr( punpckhdq) ) ]
119
+ pub unsafe fn _mm_unpackhi_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
120
+ punpckhdq ( a, b)
121
+ }
122
+
123
+ /// Unpacks the lower element from two `i32x2` vectors and interleaves them
124
+ /// into the result: `[a.0, b.0]`.
125
+ #[ inline( always) ]
126
+ #[ target_feature = "+mmx" ]
127
+ #[ cfg_attr( test, assert_instr( punpckldq) ) ]
128
+ pub unsafe fn _mm_unpacklo_pi32 ( a : __m64 , b : __m64 ) -> __m64 {
129
+ punpckldq ( a, b)
130
+ }
131
+
102
132
#[ allow( improper_ctypes) ]
103
133
extern "C" {
104
134
#[ link_name = "llvm.x86.mmx.packsswb" ]
@@ -109,12 +139,20 @@ extern "C" {
109
139
fn pcmpgtb ( a : __m64 , b : __m64 ) -> __m64 ;
110
140
#[ link_name = "llvm.x86.mmx.pcmpgt.w" ]
111
141
fn pcmpgtw ( a : __m64 , b : __m64 ) -> __m64 ;
142
+ #[ link_name = "llvm.x86.mmx.pcmpgt.d" ]
143
+ fn pcmpgtd ( a : __m64 , b : __m64 ) -> __m64 ;
112
144
#[ link_name = "llvm.x86.mmx.punpckhwd" ]
113
145
fn punpckhwd ( a : __m64 , b : __m64 ) -> __m64 ;
114
- #[ link_name = "llvm.x86.mmx.punpcklbw" ]
115
- fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
116
146
#[ link_name = "llvm.x86.mmx.punpcklwd" ]
117
147
fn punpcklwd ( a : __m64 , b : __m64 ) -> __m64 ;
148
+ #[ link_name = "llvm.x86.mmx.punpckhbw" ]
149
+ fn punpckhbw ( a : __m64 , b : __m64 ) -> __m64 ;
150
+ #[ link_name = "llvm.x86.mmx.punpcklbw" ]
151
+ fn punpcklbw ( a : __m64 , b : __m64 ) -> __m64 ;
152
+ #[ link_name = "llvm.x86.mmx.punpckhdq" ]
153
+ fn punpckhdq ( a : __m64 , b : __m64 ) -> __m64 ;
154
+ #[ link_name = "llvm.x86.mmx.punpckldq" ]
155
+ fn punpckldq ( a : __m64 , b : __m64 ) -> __m64 ;
118
156
}
119
157
120
158
#[ cfg( test) ]
@@ -123,21 +161,21 @@ mod tests {
123
161
use x86:: i686:: mmx;
124
162
use stdsimd_test:: simd_test;
125
163
126
- #[ simd_test = "sse" ] // FIXME: should be mmx
164
+ #[ simd_test = "mmx" ]
127
165
unsafe fn _mm_setzero_si64 ( ) {
128
166
let r: __m64 = :: std:: mem:: transmute ( 0_i64 ) ;
129
167
assert_eq ! ( r, mmx:: _mm_setzero_si64( ) ) ;
130
168
}
131
169
132
- #[ simd_test = "sse" ] // FIXME: should be mmx
170
+ #[ simd_test = "mmx" ]
133
171
unsafe fn _mm_packs_pi16 ( ) {
134
172
let a = i16x4:: new ( -1 , 2 , -3 , 4 ) ;
135
173
let b = i16x4:: new ( -5 , 6 , -7 , 8 ) ;
136
174
let r = i8x8:: new ( -1 , 2 , -3 , 4 , -5 , 6 , -7 , 8 ) ;
137
175
assert_eq ! ( r, i8x8:: from( mmx:: _mm_packs_pi16( a. into( ) , b. into( ) ) ) ) ;
138
176
}
139
177
140
- #[ simd_test = "sse" ] // FIXME: should be mmx
178
+ #[ simd_test = "mmx" ]
141
179
unsafe fn _mm_packs_pi32 ( ) {
142
180
let a = i32x2:: new ( -1 , 2 ) ;
143
181
let b = i32x2:: new ( -5 , 6 ) ;
@@ -162,11 +200,23 @@ mod tests {
162
200
}
163
201
164
202
#[ simd_test = "mmx" ]
165
- unsafe fn _mm_unpackhi_pi16 ( ) {
166
- let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
167
- let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
168
- let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
169
- assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpackhi_pi16( a. into( ) , b. into( ) ) ) ) ;
203
+ unsafe fn _mm_cmpgt_pi32 ( ) {
204
+ let a = i32x2:: new ( 0 , 3 ) ;
205
+ let b = i32x2:: new ( 1 , 2 ) ;
206
+ let r0 = i32x2:: new ( 0 , -1 ) ;
207
+ let r1 = i32x2:: new ( -1 , 0 ) ;
208
+
209
+ assert_eq ! ( r0, mmx:: _mm_cmpgt_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
210
+ assert_eq ! ( r1, mmx:: _mm_cmpgt_pi32( b. into( ) , a. into( ) ) . into( ) ) ;
211
+ }
212
+
213
+ #[ simd_test = "mmx" ]
214
+ unsafe fn _mm_unpackhi_pi8 ( ) {
215
+ let a = i8x8:: new ( 0 , 3 , 4 , 7 , 8 , 11 , 12 , 15 ) ;
216
+ let b = i8x8:: new ( 1 , 2 , 5 , 6 , 9 , 10 , 13 , 14 ) ;
217
+ let r = i8x8:: new ( 8 , 9 , 11 , 10 , 12 , 13 , 15 , 14 ) ;
218
+
219
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi8( a. into( ) , b. into( ) ) . into( ) ) ;
170
220
}
171
221
172
222
#[ simd_test = "mmx" ]
@@ -177,11 +227,37 @@ mod tests {
177
227
assert_eq ! ( r, i8x8:: from( mmx:: _mm_unpacklo_pi8( a. into( ) , b. into( ) ) ) ) ;
178
228
}
179
229
230
+ #[ simd_test = "mmx" ]
231
+ unsafe fn _mm_unpackhi_pi16 ( ) {
232
+ let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
233
+ let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
234
+ let r = i16x4:: new ( 2 , 6 , 3 , 7 ) ;
235
+ assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpackhi_pi16( a. into( ) , b. into( ) ) ) ) ;
236
+ }
237
+
180
238
#[ simd_test = "mmx" ]
181
239
unsafe fn _mm_unpacklo_pi16 ( ) {
182
240
let a = i16x4:: new ( 0 , 1 , 2 , 3 ) ;
183
241
let b = i16x4:: new ( 4 , 5 , 6 , 7 ) ;
184
242
let r = i16x4:: new ( 0 , 4 , 1 , 5 ) ;
185
243
assert_eq ! ( r, i16x4:: from( mmx:: _mm_unpacklo_pi16( a. into( ) , b. into( ) ) ) ) ;
186
244
}
245
+
246
+ #[ simd_test = "mmx" ]
247
+ unsafe fn _mm_unpackhi_pi32 ( ) {
248
+ let a = i32x2:: new ( 0 , 3 ) ;
249
+ let b = i32x2:: new ( 1 , 2 ) ;
250
+ let r = i32x2:: new ( 3 , 2 ) ;
251
+
252
+ assert_eq ! ( r, mmx:: _mm_unpackhi_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
253
+ }
254
+
255
+ #[ simd_test = "mmx" ]
256
+ unsafe fn _mm_unpacklo_pi32 ( ) {
257
+ let a = i32x2:: new ( 0 , 3 ) ;
258
+ let b = i32x2:: new ( 1 , 2 ) ;
259
+ let r = i32x2:: new ( 0 , 1 ) ;
260
+
261
+ assert_eq ! ( r, mmx:: _mm_unpacklo_pi32( a. into( ) , b. into( ) ) . into( ) ) ;
262
+ }
187
263
}
0 commit comments