Skip to content

Commit 705031d

Browse files
committed
Implement _mm_cvttps_epi32
1 parent 3ec8d7a commit 705031d

File tree

1 file changed

+20
-5
lines changed

1 file changed

+20
-5
lines changed

src/intrinsics/llvm_x86.rs

+20-5
Original file line numberDiff line numberDiff line change
@@ -273,16 +273,31 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
273273
);
274274
}
275275
"llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => {
276-
let a = match args {
277-
[a] => a,
278-
_ => bug!("wrong number of args for intrinsic {intrinsic}"),
279-
};
280-
let a = codegen_operand(fx, a);
276+
intrinsic_args!(fx, args => (a); intrinsic);
281277

282278
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
283279
fx.bcx.ins().iabs(lane)
284280
});
285281
}
282+
"llvm.x86.sse2.cvttps2dq" => {
283+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32&ig_expand=2429
284+
intrinsic_args!(fx, args => (a); intrinsic);
285+
let a = a.load_scalar(fx);
286+
287+
// Using inline asm instead of fcvt_to_sint_sat as unrepresentable values are turned
288+
// into 0x80000000 for which Cranelift doesn't have a native instruction.
289+
codegen_inline_asm_inner(
290+
fx,
291+
&[InlineAsmTemplatePiece::String(format!("cvttps2dq xmm0, xmm0"))],
292+
&[CInlineAsmOperand::InOut {
293+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
294+
_late: true,
295+
in_value: a,
296+
out_place: Some(ret),
297+
}],
298+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
299+
);
300+
}
286301
"llvm.x86.addcarry.32" | "llvm.x86.addcarry.64" => {
287302
intrinsic_args!(fx, args => (c_in, a, b); intrinsic);
288303
let c_in = c_in.load_scalar(fx);

0 commit comments

Comments
 (0)