Skip to content

Commit cd04817

Browse files
jneemalexcrichton
authored andcommitted
avx2: _mm_blend_epi32 (rust-lang#127)
1 parent 141a23d commit cd04817

File tree

1 file changed

+67
-2
lines changed

1 file changed

+67
-2
lines changed

src/x86/avx2.rs

+67-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use simd_llvm::simd_shuffle32;
1+
use simd_llvm::{simd_shuffle8, simd_shuffle32};
22
use v256::*;
33
use v128::*;
44
use x86::__m256i;
@@ -184,8 +184,57 @@ pub unsafe fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
184184
pavgb(a, b)
185185
}
186186

187+
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
188+
#[inline(always)]
189+
#[target_feature = "+avx2"]
190+
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
191+
pub unsafe fn _mm_blend_epi32(a: i32x8, b: i32x8, imm8: i32) -> i32x8 {
192+
let imm8 = (imm8 & 0xFF) as u8;
193+
macro_rules! blend4 {
194+
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
195+
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
196+
}
197+
}
198+
macro_rules! blend3 {
199+
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
200+
match (imm8 >> 6) & 0b11 {
201+
0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7),
202+
0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7),
203+
0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15),
204+
_ => blend4!($a, $b, $c, $d, $e, $f, 14, 15),
205+
}
206+
}
207+
}
208+
macro_rules! blend2 {
209+
($a:expr, $b:expr, $c:expr, $d:expr) => {
210+
match (imm8 >> 4) & 0b11 {
211+
0b00 => blend3!($a, $b, $c, $d, 4, 5),
212+
0b01 => blend3!($a, $b, $c, $d, 12, 5),
213+
0b10 => blend3!($a, $b, $c, $d, 4, 13),
214+
_ => blend3!($a, $b, $c, $d, 12, 13),
215+
}
216+
}
217+
}
218+
macro_rules! blend1 {
219+
($a:expr, $b:expr) => {
220+
match (imm8 >> 2) & 0b11 {
221+
0b00 => blend2!($a, $b, 2, 3),
222+
0b01 => blend2!($a, $b, 10, 3),
223+
0b10 => blend2!($a, $b, 2, 11),
224+
_ => blend2!($a, $b, 10, 11),
225+
}
226+
}
227+
}
228+
match imm8 & 0b11 {
229+
0b00 => blend1!(0, 1),
230+
0b01 => blend1!(8, 1),
231+
0b10 => blend1!(0, 9),
232+
_ => blend1!(8, 9),
233+
}
234+
}
235+
236+
187237
// TODO _mm256_blend_epi16
188-
// TODO _mm_blend_epi32
189238
// TODO _mm256_blend_epi32
190239

191240
/// Blend packed 8-bit integers from `a` and `b` using `mask`.
@@ -1444,6 +1493,22 @@ mod tests {
14441493
assert_eq!(r, u16x16::splat(6));
14451494
}
14461495

1496+
#[simd_test = "avx2"]
1497+
unsafe fn _mm_blend_epi32() {
1498+
let (a, b) = (i32x8::splat(3), i32x8::splat(9));
1499+
let e = i32x8::splat(3).replace(0, 9);
1500+
let r = avx2::_mm_blend_epi32(a, b, 0x01 as i32);
1501+
assert_eq!(r, e);
1502+
1503+
let e = i32x8::splat(3).replace(1, 9).replace(7, 9);
1504+
let r = avx2::_mm_blend_epi32(a, b, 0x82 as i32);
1505+
assert_eq!(r, e);
1506+
1507+
let e = i32x8::splat(9).replace(0, 3).replace(1, 3).replace(7, 3);
1508+
let r = avx2::_mm_blend_epi32(a, b, 0x7C as i32);
1509+
assert_eq!(r, e);
1510+
}
1511+
14471512
#[simd_test = "avx2"]
14481513
unsafe fn _mm256_blendv_epi8() {
14491514
let (a,b) = (i8x32::splat(4),i8x32::splat(2));

0 commit comments

Comments
 (0)