Skip to content

Commit 48c45c4

Browse files
authored
Use native scalar fma instruction (#1267)
Cranelift 0.87 now supports lowering `fma` as a libcall on x86 [0]. With 0.88 enabling the native x86 instruction under the `has_fma` flag. aarch64 and s390x already support this as a native instruction, so it's nice that we emit it for those. We can't lower the SIMD version using the `fma` instruction since the lowering can fail if the x86 `has_fma` flag is not enabled. Cranelift doesn't yet know how to fallback for these cases [0]: bytecodealliance/wasmtime@709716b
1 parent 156bda8 commit 48c45c4

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

src/intrinsics/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,12 @@ fn codegen_float_intrinsic_call<'tcx>(
303303

304304
let layout = fx.layout_of(ty);
305305
let res = match intrinsic {
306+
sym::fmaf32 | sym::fmaf64 => {
307+
let a = args[0].load_scalar(fx);
308+
let b = args[1].load_scalar(fx);
309+
let c = args[2].load_scalar(fx);
310+
CValue::by_val(fx.bcx.ins().fma(a, b, c), layout)
311+
}
306312
sym::copysignf32 | sym::copysignf64 => {
307313
let a = args[0].load_scalar(fx);
308314
let b = args[1].load_scalar(fx);

src/intrinsics/simd.rs

+6-12
Original file line numberDiff line numberDiff line change
@@ -397,21 +397,15 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
397397

398398
let layout = a.layout();
399399
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
400+
let res_lane_layout = fx.layout_of(lane_ty);
400401

401402
for lane in 0..lane_count {
402-
let a_lane = a.value_lane(fx, lane);
403-
let b_lane = b.value_lane(fx, lane);
404-
let c_lane = c.value_lane(fx, lane);
403+
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
404+
let b_lane = b.value_lane(fx, lane).load_scalar(fx);
405+
let c_lane = c.value_lane(fx, lane).load_scalar(fx);
405406

406-
let res_lane = match lane_ty.kind() {
407-
ty::Float(FloatTy::F32) => {
408-
fx.easy_call("fmaf", &[a_lane, b_lane, c_lane], lane_ty)
409-
}
410-
ty::Float(FloatTy::F64) => {
411-
fx.easy_call("fma", &[a_lane, b_lane, c_lane], lane_ty)
412-
}
413-
_ => unreachable!(),
414-
};
407+
let res_lane = fx.bcx.ins().fma(a_lane, b_lane, c_lane);
408+
let res_lane = CValue::by_val(res_lane, res_lane_layout);
415409

416410
ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
417411
}

0 commit comments

Comments
 (0)