Skip to content

Commit 22210c2

Browse files
gwennalexcrichton
authored andcommitted
* sse: _mm_cvt_pi2ps * sse: _mm_extract_pi16 * sse: _mm_insert_pi16 * sse: _mm_movemask_pi8 * sse: _mm_shuffle_pi16 * sse: fix _mm_insert_pi16 and _mm_extract_pi16 * sse: add tests
1 parent 8abb73c commit 22210c2

File tree

1 file changed

+112
-0
lines changed

1 file changed

+112
-0
lines changed

coresimd/src/x86/i686/sse.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@ use stdsimd_test::assert_instr;
1212

1313
#[allow(improper_ctypes)]
1414
extern "C" {
15+
#[link_name = "llvm.x86.sse.cvtpi2ps"]
16+
fn cvtpi2ps(a: f32x4, b: __m64) -> f32x4;
17+
#[link_name = "llvm.x86.mmx.pextr.w"]
18+
fn pextrw(a: __m64, imm8: i32) -> i32;
19+
#[link_name = "llvm.x86.mmx.pinsr.w"]
20+
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
21+
#[link_name = "llvm.x86.mmx.pmovmskb"]
22+
fn pmovmskb(a: __m64) -> i32;
23+
#[link_name = "llvm.x86.sse.pshuf.w"]
24+
fn pshufw(a: __m64, imm8: i8) -> __m64;
1525
#[link_name = "llvm.x86.mmx.pmaxs.w"]
1626
fn pmaxsw(a: __m64, b: __m64) -> __m64;
1727
#[link_name = "llvm.x86.mmx.pmaxu.b"]
@@ -98,6 +108,64 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
98108
_mm_min_pu8(a, b)
99109
}
100110

111+
/// Converts two elements of a 64-bit vector of [2 x i32] into two
112+
/// floating point values and writes them to the lower 64-bits of the
113+
/// destination. The remaining higher order elements of the destination are
114+
/// copied from the corresponding elements in the first operand.
115+
#[inline(always)]
116+
#[target_feature = "+sse"]
117+
#[cfg_attr(test, assert_instr(cvtpi2ps))]
118+
pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
119+
cvtpi2ps(a, mem::transmute(b))
120+
}
121+
122+
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
123+
/// returns it, as specified by the immediate integer operand.
124+
#[inline(always)]
125+
#[target_feature = "+sse"]
126+
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
127+
pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
128+
macro_rules! call {
129+
($imm2:expr) => { pextrw(mem::transmute(a), $imm2) as i16 }
130+
}
131+
constify_imm2!(imm2, call)
132+
}
133+
134+
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
135+
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
136+
/// specified by the immediate operand `n`.
137+
#[inline(always)]
138+
#[target_feature = "+sse"]
139+
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
140+
pub unsafe fn _mm_insert_pi16(a: i16x4, d: i32, imm2: i32) -> i16x4 {
141+
macro_rules! call {
142+
($imm2:expr) => { mem::transmute(pinsrw(mem::transmute(a), d, $imm2)) }
143+
}
144+
constify_imm2!(imm2, call)
145+
}
146+
147+
/// Takes the most significant bit from each 8-bit element in a 64-bit
148+
/// integer vector to create a 16-bit mask value. Zero-extends the value to
149+
/// 32-bit integer and writes it to the destination.
150+
#[inline(always)]
151+
#[target_feature = "+sse"]
152+
#[cfg_attr(test, assert_instr(pmovmskb))]
153+
pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
154+
pmovmskb(mem::transmute(a))
155+
}
156+
157+
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
158+
/// destination, as specified by the immediate value operand.
159+
#[inline(always)]
160+
#[target_feature = "+sse"]
161+
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
162+
pub unsafe fn _mm_shuffle_pi16(a: i16x4, imm8: i8) -> i16x4 {
163+
macro_rules! call {
164+
($imm8:expr) => { mem::transmute(pshufw(mem::transmute(a), $imm8)) }
165+
}
166+
constify_imm8!(imm8, call)
167+
}
168+
101169
/// Convert the two lower packed single-precision (32-bit) floating-point
102170
/// elements in `a` to packed 32-bit integers with truncation.
103171
#[inline(always)]
@@ -205,6 +273,50 @@ mod tests {
205273
assert_eq!(r, sse::_m_pminub(a, b));
206274
}
207275

276+
#[simd_test = "sse"]
277+
unsafe fn _mm_cvt_pi2ps() {
278+
let a = f32x4::new(0., 0., 3., 4.);
279+
let b = i32x2::new(1, 2);
280+
let expected = f32x4::new(1., 2., 3., 4.);
281+
let r = sse::_mm_cvt_pi2ps(a, b);
282+
assert_eq!(r, expected);
283+
}
284+
285+
#[simd_test = "sse"]
286+
unsafe fn _mm_extract_pi16() {
287+
let a = i16x4::new(1, 2, 3, 4);
288+
let r = sse::_mm_extract_pi16(a, 0);
289+
assert_eq!(r, 1);
290+
let r = sse::_mm_extract_pi16(a, 1);
291+
assert_eq!(r, 2);
292+
}
293+
294+
#[simd_test = "sse"]
295+
unsafe fn _mm_insert_pi16() {
296+
let a = i16x4::new(1, 2, 3, 4);
297+
let r = sse::_mm_insert_pi16(a, 0, 0b0);
298+
let expected = i16x4::new(0, 2, 3, 4);
299+
assert_eq!(r, expected);
300+
let r = sse::_mm_insert_pi16(a, 0, 0b10);
301+
let expected = i16x4::new(1, 2, 0, 4);
302+
assert_eq!(r, expected);
303+
}
304+
305+
#[simd_test = "sse"]
306+
unsafe fn _mm_movemask_pi8() {
307+
let a = i16x4::new(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
308+
let r = sse::_mm_movemask_pi8(a);
309+
assert_eq!(r, 0b10001);
310+
}
311+
312+
#[simd_test = "sse"]
313+
unsafe fn _mm_shuffle_pi16() {
314+
let a = i16x4::new(1, 2, 3, 4);
315+
let r = sse::_mm_shuffle_pi16(a, 0b00_01_01_11);
316+
let expected = i16x4::new(4, 2, 2, 1);
317+
assert_eq!(r, expected);
318+
}
319+
208320
#[simd_test = "sse"]
209321
unsafe fn _mm_cvtps_pi32() {
210322
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);

0 commit comments

Comments
 (0)