Skip to content

Commit 893ba53

Browse files
authored
Merge pull request #1491 from folkertdev/add-llvm-avx2-permd
add `llvm.x86.avx2.permd` intrinsic
2 parents 7b50189 + 4a4535a commit 893ba53

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

example/std_example.rs

+11
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ unsafe fn test_simd() {
244244

245245
test_mm256_shuffle_epi8();
246246
test_mm256_permute2x128_si256();
247+
test_mm256_permutevar8x32_epi32();
247248

248249
#[rustfmt::skip]
249250
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
@@ -447,6 +448,16 @@ unsafe fn test_mm256_permute2x128_si256() {
447448
assert_eq_m256i(r, e);
448449
}
449450

451+
#[cfg(target_arch = "x86_64")]
452+
#[target_feature(enable = "avx2")]
453+
unsafe fn test_mm256_permutevar8x32_epi32() {
454+
let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
455+
let idx = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
456+
let r = _mm256_setr_epi32(800, 700, 600, 500, 400, 300, 200, 100);
457+
let e = _mm256_permutevar8x32_epi32(a, idx);
458+
assert_eq_m256i(r, e);
459+
}
460+
450461
fn test_checked_mul() {
451462
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
452463
assert_eq!(u, None);

src/intrinsics/llvm_x86.rs

+15
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
374374
}
375375
}
376376
}
377+
"llvm.x86.avx2.permd" => {
378+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32
379+
intrinsic_args!(fx, args => (a, idx); intrinsic);
380+
381+
for j in 0..=7 {
382+
let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx);
383+
let index = fx.bcx.ins().uextend(fx.pointer_type, index);
384+
let value = a.value_lane_dyn(fx, index).load_scalar(fx);
385+
ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store(
386+
fx,
387+
value,
388+
MemFlags::trusted(),
389+
);
390+
}
391+
}
377392
"llvm.x86.avx2.vperm2i128"
378393
| "llvm.x86.avx.vperm2f128.ps.256"
379394
| "llvm.x86.avx.vperm2f128.pd.256" => {

0 commit comments

Comments
 (0)