Skip to content

Commit e6e2f00

Browse files
committed
Fix incorrect implementation of several vendor intrinsics
1 parent 3b49b9e commit e6e2f00

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

src/intrinsics/llvm_x86.rs

+16-16
Original file line numberDiff line numberDiff line change
@@ -556,12 +556,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
556556
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
557557
for out_lane_idx in 0..lane_count / 2 {
558558
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
559-
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
559+
let a_lane0 = fx.bcx.ins().sextend(types::I32, a_lane0);
560560
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
561561
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
562562

563563
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
564-
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
564+
let a_lane1 = fx.bcx.ins().sextend(types::I32, a_lane1);
565565
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
566566
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
567567

@@ -716,14 +716,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
716716
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
717717
assert_eq!(lane_count * 2, ret_lane_count);
718718

719-
let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
720-
let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
719+
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
720+
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
721721
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
722722

723723
for idx in 0..lane_count {
724724
let lane = a.value_lane(fx, idx).load_scalar(fx);
725725
let sat = fx.bcx.ins().smax(lane, min_i16);
726-
let sat = fx.bcx.ins().umin(sat, max_i16);
726+
let sat = fx.bcx.ins().smin(sat, max_i16);
727727
let res = fx.bcx.ins().ireduce(types::I16, sat);
728728

729729
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -733,7 +733,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
733733
for idx in 0..lane_count {
734734
let lane = b.value_lane(fx, idx).load_scalar(fx);
735735
let sat = fx.bcx.ins().smax(lane, min_i16);
736-
let sat = fx.bcx.ins().umin(sat, max_i16);
736+
let sat = fx.bcx.ins().smin(sat, max_i16);
737737
let res = fx.bcx.ins().ireduce(types::I16, sat);
738738

739739
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -760,8 +760,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
760760

761761
for idx in 0..lane_count {
762762
let lane = a.value_lane(fx, idx).load_scalar(fx);
763-
let sat = fx.bcx.ins().umax(lane, min_u16);
764-
let sat = fx.bcx.ins().umin(sat, max_u16);
763+
let sat = fx.bcx.ins().smax(lane, min_u16);
764+
let sat = fx.bcx.ins().smin(sat, max_u16);
765765
let res = fx.bcx.ins().ireduce(types::I16, sat);
766766

767767
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -770,8 +770,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
770770

771771
for idx in 0..lane_count {
772772
let lane = b.value_lane(fx, idx).load_scalar(fx);
773-
let sat = fx.bcx.ins().umax(lane, min_u16);
774-
let sat = fx.bcx.ins().umin(sat, max_u16);
773+
let sat = fx.bcx.ins().smax(lane, min_u16);
774+
let sat = fx.bcx.ins().smin(sat, max_u16);
775775
let res = fx.bcx.ins().ireduce(types::I16, sat);
776776

777777
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -792,14 +792,14 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
792792
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
793793
assert_eq!(lane_count * 2, ret_lane_count);
794794

795-
let min_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MIN as u16));
796-
let max_i16 = fx.bcx.ins().iconst(types::I32, i64::from(i16::MAX as u16));
795+
let min_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MIN) as u32 as i64);
796+
let max_i16 = fx.bcx.ins().iconst(types::I32, i32::from(i16::MAX) as u32 as i64);
797797
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
798798

799799
for idx in 0..lane_count / 2 {
800800
let lane = a.value_lane(fx, idx).load_scalar(fx);
801801
let sat = fx.bcx.ins().smax(lane, min_i16);
802-
let sat = fx.bcx.ins().umin(sat, max_i16);
802+
let sat = fx.bcx.ins().smin(sat, max_i16);
803803
let res = fx.bcx.ins().ireduce(types::I16, sat);
804804

805805
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -809,7 +809,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
809809
for idx in 0..lane_count / 2 {
810810
let lane = b.value_lane(fx, idx).load_scalar(fx);
811811
let sat = fx.bcx.ins().smax(lane, min_i16);
812-
let sat = fx.bcx.ins().umin(sat, max_i16);
812+
let sat = fx.bcx.ins().smin(sat, max_i16);
813813
let res = fx.bcx.ins().ireduce(types::I16, sat);
814814

815815
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -819,7 +819,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
819819
for idx in 0..lane_count / 2 {
820820
let lane = a.value_lane(fx, idx).load_scalar(fx);
821821
let sat = fx.bcx.ins().smax(lane, min_i16);
822-
let sat = fx.bcx.ins().umin(sat, max_i16);
822+
let sat = fx.bcx.ins().smin(sat, max_i16);
823823
let res = fx.bcx.ins().ireduce(types::I16, sat);
824824

825825
let res_lane = CValue::by_val(res, ret_lane_layout);
@@ -829,7 +829,7 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
829829
for idx in 0..lane_count / 2 {
830830
let lane = b.value_lane(fx, idx).load_scalar(fx);
831831
let sat = fx.bcx.ins().smax(lane, min_i16);
832-
let sat = fx.bcx.ins().umin(sat, max_i16);
832+
let sat = fx.bcx.ins().smin(sat, max_i16);
833833
let res = fx.bcx.ins().ireduce(types::I16, sat);
834834

835835
let res_lane = CValue::by_val(res, ret_lane_layout);

0 commit comments

Comments
 (0)