Skip to content

Commit a558968

Browse files
committed
Implement all llvm intrinsics necessary for the image crate
Fixes #1379
1 parent efd3081 commit a558968

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed

src/intrinsics/llvm_x86.rs

+103
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,109 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
344344
fx.bcx.ins().sshr(a_lane, saturated_count)
345345
});
346346
}
347+
"llvm.x86.sse2.psad.bw" => {
348+
intrinsic_args!(fx, args => (a, b); intrinsic);
349+
350+
assert_eq!(a.layout(), b.layout());
351+
let layout = a.layout();
352+
353+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
354+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
355+
assert_eq!(lane_ty, fx.tcx.types.u8);
356+
assert_eq!(ret_lane_ty, fx.tcx.types.u64);
357+
assert_eq!(lane_count, ret_lane_count * 8);
358+
359+
let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
360+
for out_lane_idx in 0..lane_count / 8 {
361+
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
362+
363+
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
364+
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
365+
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
366+
367+
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
368+
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
369+
let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
370+
lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
371+
}
372+
373+
let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
374+
375+
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
376+
}
377+
}
378+
"llvm.x86.ssse3.pmadd.ub.sw.128" => {
379+
intrinsic_args!(fx, args => (a, b); intrinsic);
380+
381+
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
382+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
383+
assert_eq!(lane_ty, fx.tcx.types.u8);
384+
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
385+
assert_eq!(lane_count, ret_lane_count * 2);
386+
387+
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
388+
for out_lane_idx in 0..lane_count / 2 {
389+
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
390+
let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
391+
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
392+
let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
393+
394+
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
395+
let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
396+
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
397+
let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
398+
399+
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
400+
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
401+
402+
let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
403+
404+
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
405+
406+
let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
407+
let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
408+
409+
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
410+
let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
411+
412+
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
413+
414+
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
415+
}
416+
}
417+
"llvm.x86.sse2.pmadd.wd" => {
418+
intrinsic_args!(fx, args => (a, b); intrinsic);
419+
420+
assert_eq!(a.layout(), b.layout());
421+
let layout = a.layout();
422+
423+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
424+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
425+
assert_eq!(lane_ty, fx.tcx.types.i16);
426+
assert_eq!(ret_lane_ty, fx.tcx.types.i32);
427+
assert_eq!(lane_count, ret_lane_count * 2);
428+
429+
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
430+
for out_lane_idx in 0..lane_count / 2 {
431+
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
432+
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
433+
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
434+
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
435+
436+
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
437+
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
438+
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
439+
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
440+
441+
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
442+
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
443+
444+
let res_lane = fx.bcx.ins().iadd(mul0, mul1);
445+
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
446+
447+
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
448+
}
449+
}
347450
_ => {
348451
fx.tcx
349452
.sess

0 commit comments

Comments
 (0)