|
1 |
| -use simd_llvm::simd_shuffle32; |
| 1 | +use simd_llvm::{simd_shuffle8, simd_shuffle32}; |
2 | 2 | use v256::*;
|
3 | 3 | use v128::*;
|
4 | 4 | use x86::__m256i;
|
@@ -184,8 +184,57 @@ pub unsafe fn _mm256_avg_epu8 (a: u8x32, b: u8x32) -> u8x32 {
|
184 | 184 | pavgb(a, b)
|
185 | 185 | }
|
186 | 186 |
|
| 187 | +/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`. |
| 188 | +#[inline(always)] |
| 189 | +#[target_feature = "+avx2"] |
| 190 | +#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))] |
| 191 | +pub unsafe fn _mm_blend_epi32(a: i32x8, b: i32x8, imm8: i32) -> i32x8 { |
| 192 | + let imm8 = (imm8 & 0xFF) as u8; |
| 193 | + macro_rules! blend4 { |
| 194 | + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => { |
| 195 | + simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); |
| 196 | + } |
| 197 | + } |
| 198 | + macro_rules! blend3 { |
| 199 | + ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => { |
| 200 | + match (imm8 >> 6) & 0b11 { |
| 201 | + 0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7), |
| 202 | + 0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7), |
| 203 | + 0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15), |
| 204 | + _ => blend4!($a, $b, $c, $d, $e, $f, 14, 15), |
| 205 | + } |
| 206 | + } |
| 207 | + } |
| 208 | + macro_rules! blend2 { |
| 209 | + ($a:expr, $b:expr, $c:expr, $d:expr) => { |
| 210 | + match (imm8 >> 4) & 0b11 { |
| 211 | + 0b00 => blend3!($a, $b, $c, $d, 4, 5), |
| 212 | + 0b01 => blend3!($a, $b, $c, $d, 12, 5), |
| 213 | + 0b10 => blend3!($a, $b, $c, $d, 4, 13), |
| 214 | + _ => blend3!($a, $b, $c, $d, 12, 13), |
| 215 | + } |
| 216 | + } |
| 217 | + } |
| 218 | + macro_rules! blend1 { |
| 219 | + ($a:expr, $b:expr) => { |
| 220 | + match (imm8 >> 2) & 0b11 { |
| 221 | + 0b00 => blend2!($a, $b, 2, 3), |
| 222 | + 0b01 => blend2!($a, $b, 10, 3), |
| 223 | + 0b10 => blend2!($a, $b, 2, 11), |
| 224 | + _ => blend2!($a, $b, 10, 11), |
| 225 | + } |
| 226 | + } |
| 227 | + } |
| 228 | + match imm8 & 0b11 { |
| 229 | + 0b00 => blend1!(0, 1), |
| 230 | + 0b01 => blend1!(8, 1), |
| 231 | + 0b10 => blend1!(0, 9), |
| 232 | + _ => blend1!(8, 9), |
| 233 | + } |
| 234 | +} |
| 235 | + |
| 236 | + |
187 | 237 | // TODO _mm256_blend_epi16
|
188 |
| -// TODO _mm_blend_epi32 |
189 | 238 | // TODO _mm256_blend_epi32
|
190 | 239 |
|
191 | 240 | /// Blend packed 8-bit integers from `a` and `b` using `mask`.
|
@@ -1444,6 +1493,22 @@ mod tests {
|
1444 | 1493 | assert_eq!(r, u16x16::splat(6));
|
1445 | 1494 | }
|
1446 | 1495 |
|
| 1496 | + #[simd_test = "avx2"] |
| 1497 | + unsafe fn _mm_blend_epi32() { |
| 1498 | + let (a, b) = (i32x8::splat(3), i32x8::splat(9)); |
| 1499 | + let e = i32x8::splat(3).replace(0, 9); |
| 1500 | + let r = avx2::_mm_blend_epi32(a, b, 0x01 as i32); |
| 1501 | + assert_eq!(r, e); |
| 1502 | + |
| 1503 | + let e = i32x8::splat(3).replace(1, 9).replace(7, 9); |
| 1504 | + let r = avx2::_mm_blend_epi32(a, b, 0x82 as i32); |
| 1505 | + assert_eq!(r, e); |
| 1506 | + |
| 1507 | + let e = i32x8::splat(9).replace(0, 3).replace(1, 3).replace(7, 3); |
| 1508 | + let r = avx2::_mm_blend_epi32(a, b, 0x7C as i32); |
| 1509 | + assert_eq!(r, e); |
| 1510 | + } |
| 1511 | + |
1447 | 1512 | #[simd_test = "avx2"]
|
1448 | 1513 | unsafe fn _mm256_blendv_epi8() {
|
1449 | 1514 | let (a,b) = (i8x32::splat(4),i8x32::splat(2));
|
|
0 commit comments