Skip to content

Commit 65da671

Browse files
committed
Implement *fmaddsub_p*, *fmsubadd_p* and *fnmadd_p* vendor intrinsics
1 parent 705031d commit 65da671

File tree

1 file changed

+111
-0
lines changed

1 file changed

+111
-0
lines changed

src/intrinsics/llvm_x86.rs

+111
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
735735
}
736736
}
737737

738+
"llvm.x86.fma.vfmaddsub.ps"
739+
| "llvm.x86.fma.vfmaddsub.pd"
740+
| "llvm.x86.fma.vfmaddsub.ps.256"
741+
| "llvm.x86.fma.vfmaddsub.pd.256" => {
742+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
743+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
744+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
745+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
746+
intrinsic_args!(fx, args => (a, b, c); intrinsic);
747+
748+
assert_eq!(a.layout(), b.layout());
749+
assert_eq!(a.layout(), c.layout());
750+
let layout = a.layout();
751+
752+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
753+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
754+
assert!(lane_ty.is_floating_point());
755+
assert!(ret_lane_ty.is_floating_point());
756+
assert_eq!(lane_count, ret_lane_count);
757+
let ret_lane_layout = fx.layout_of(ret_lane_ty);
758+
759+
for idx in 0..lane_count {
760+
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
761+
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
762+
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
763+
764+
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
765+
let res = if idx & 1 == 0 {
766+
fx.bcx.ins().fsub(mul, c_lane)
767+
} else {
768+
fx.bcx.ins().fadd(mul, c_lane)
769+
};
770+
771+
let res_lane = CValue::by_val(res, ret_lane_layout);
772+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
773+
}
774+
}
775+
776+
"llvm.x86.fma.vfmsubadd.ps"
777+
| "llvm.x86.fma.vfmsubadd.pd"
778+
| "llvm.x86.fma.vfmsubadd.ps.256"
779+
| "llvm.x86.fma.vfmsubadd.pd.256" => {
780+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
781+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
782+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
783+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
784+
intrinsic_args!(fx, args => (a, b, c); intrinsic);
785+
786+
assert_eq!(a.layout(), b.layout());
787+
assert_eq!(a.layout(), c.layout());
788+
let layout = a.layout();
789+
790+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
791+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
792+
assert!(lane_ty.is_floating_point());
793+
assert!(ret_lane_ty.is_floating_point());
794+
assert_eq!(lane_count, ret_lane_count);
795+
let ret_lane_layout = fx.layout_of(ret_lane_ty);
796+
797+
for idx in 0..lane_count {
798+
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
799+
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
800+
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
801+
802+
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
803+
let res = if idx & 1 == 0 {
804+
fx.bcx.ins().fadd(mul, c_lane)
805+
} else {
806+
fx.bcx.ins().fsub(mul, c_lane)
807+
};
808+
809+
let res_lane = CValue::by_val(res, ret_lane_layout);
810+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
811+
}
812+
}
813+
814+
"llvm.x86.fma.vfnmadd.ps"
815+
| "llvm.x86.fma.vfnmadd.pd"
816+
| "llvm.x86.fma.vfnmadd.ps.256"
817+
| "llvm.x86.fma.vfnmadd.pd.256" => {
818+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
819+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
820+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
821+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
822+
intrinsic_args!(fx, args => (a, b, c); intrinsic);
823+
824+
assert_eq!(a.layout(), b.layout());
825+
assert_eq!(a.layout(), c.layout());
826+
let layout = a.layout();
827+
828+
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
829+
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
830+
assert!(lane_ty.is_floating_point());
831+
assert!(ret_lane_ty.is_floating_point());
832+
assert_eq!(lane_count, ret_lane_count);
833+
let ret_lane_layout = fx.layout_of(ret_lane_ty);
834+
835+
for idx in 0..lane_count {
836+
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
837+
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
838+
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
839+
840+
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
841+
let neg_mul = fx.bcx.ins().fneg(mul);
842+
let res = fx.bcx.ins().fadd(neg_mul, c_lane);
843+
844+
let res_lane = CValue::by_val(res, ret_lane_layout);
845+
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
846+
}
847+
}
848+
738849
"llvm.x86.sse42.pcmpestri128" => {
739850
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
740851
intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);

0 commit comments

Comments
 (0)