Skip to content

Commit c511676

Browse files
authored
Merge pull request #1495 from folkertdev/add-llvm-sse2-cvtps2dq
add `llvm.x86.sse2.cvtps2dq`
2 parents 632e5df + 63cb28e commit c511676

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

example/std_example.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ unsafe fn test_simd() {
251251
test_mm_add_epi8();
252252
test_mm_add_pd();
253253
test_mm_cvtepi8_epi16();
254+
#[cfg(not(jit))]
255+
test_mm_cvtps_epi32();
256+
test_mm_cvttps_epi32();
254257
test_mm_cvtsi128_si64();
255258

256259
test_mm_extract_epi8();
@@ -476,6 +479,41 @@ unsafe fn test_mm256_permutevar8x32_epi32() {
476479
assert_eq_m256i(r, e);
477480
}
478481

482+
#[cfg(target_arch = "x86_64")]
483+
#[target_feature(enable = "avx2")]
484+
#[cfg(not(jit))]
485+
unsafe fn test_mm_cvtps_epi32() {
486+
let floats: [f32; 4] = [1.5, -2.5, i32::MAX as f32 + 1.0, f32::NAN];
487+
488+
let float_vec = _mm_loadu_ps(floats.as_ptr());
489+
let int_vec = _mm_cvtps_epi32(float_vec);
490+
491+
let mut ints: [i32; 4] = [0; 4];
492+
_mm_storeu_si128(ints.as_mut_ptr() as *mut __m128i, int_vec);
493+
494+
// this is very different from `floats.map(|f| f as i32)`!
495+
let expected_ints: [i32; 4] = [2, -2, i32::MIN, i32::MIN];
496+
497+
assert_eq!(ints, expected_ints);
498+
}
499+
500+
#[cfg(target_arch = "x86_64")]
501+
#[target_feature(enable = "avx2")]
502+
unsafe fn test_mm_cvttps_epi32() {
503+
let floats: [f32; 4] = [1.5, -2.5, i32::MAX as f32 + 1.0, f32::NAN];
504+
505+
let float_vec = _mm_loadu_ps(floats.as_ptr());
506+
let int_vec = _mm_cvttps_epi32(float_vec);
507+
508+
let mut ints: [i32; 4] = [0; 4];
509+
_mm_storeu_si128(ints.as_mut_ptr() as *mut __m128i, int_vec);
510+
511+
// this is very different from `floats.map(|f| f as i32)`!
512+
let expected_ints: [i32; 4] = [1, -2, i32::MIN, i32::MIN];
513+
514+
assert_eq!(ints, expected_ints);
515+
}
516+
479517
fn test_checked_mul() {
480518
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
481519
assert_eq!(u, None);

src/intrinsics/llvm_x86.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
459459
intrinsic_args!(fx, args => (a); intrinsic);
460460
let a = a.load_scalar(fx);
461461

462+
let value = fx.bcx.ins().x86_cvtt2dq(types::I32X4, a);
463+
let cvalue = CValue::by_val(value, ret.layout());
464+
ret.write_cvalue(fx, cvalue);
465+
}
466+
"llvm.x86.sse2.cvtps2dq" => {
467+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32
468+
intrinsic_args!(fx, args => (a); intrinsic);
469+
let a = a.load_scalar(fx);
470+
462471
// Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
463472
// into 0x80000000 for which Cranelift doesn't have a native instruction.
464473
codegen_inline_asm_inner(
465474
fx,
466-
&[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))],
475+
&[InlineAsmTemplatePiece::String(format!("cvtps2dq xmm0, xmm0"))],
467476
&[CInlineAsmOperand::InOut {
468477
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
469478
_late: true,

0 commit comments

Comments
 (0)