Skip to content

Commit f7e4646

Browse files
committed
Add f16/f128 intrinsic support
1 parent e46186f commit f7e4646

File tree

3 files changed

+135
-1
lines changed

3 files changed

+135
-1
lines changed

src/codegen_f16_f128.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,41 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
119119
fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
120120
}
121121

122+
pub(crate) fn abs_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
123+
let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value);
124+
let bits = fx.bcx.ins().band_imm(bits, 0x7fff);
125+
fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits)
126+
}
127+
128+
pub(crate) fn abs_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
129+
let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value);
130+
let (low, high) = fx.bcx.ins().isplit(bits);
131+
let high = fx.bcx.ins().band_imm(high, 0x7fff_ffff_ffff_ffff_u64 as i64);
132+
let bits = fx.bcx.ins().iconcat(low, high);
133+
fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
134+
}
135+
136+
pub(crate) fn copysign_f16(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value {
137+
let lhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), lhs);
138+
let rhs = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), rhs);
139+
let res = fx.bcx.ins().band_imm(lhs, 0x7fff);
140+
let sign = fx.bcx.ins().band_imm(rhs, 0x8000);
141+
let res = fx.bcx.ins().bor(res, sign);
142+
fx.bcx.ins().bitcast(types::F16, MemFlags::new(), res)
143+
}
144+
145+
pub(crate) fn copysign_f128(fx: &mut FunctionCx<'_, '_, '_>, lhs: Value, rhs: Value) -> Value {
146+
let lhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), lhs);
147+
let rhs = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), rhs);
148+
let (low, lhs_high) = fx.bcx.ins().isplit(lhs);
149+
let (_, rhs_high) = fx.bcx.ins().isplit(rhs);
150+
let high = fx.bcx.ins().band_imm(lhs_high, 0x7fff_ffff_ffff_ffff_u64 as i64);
151+
let sign = fx.bcx.ins().band_imm(rhs_high, 0x8000_0000_0000_0000_u64 as i64);
152+
let high = fx.bcx.ins().bor(high, sign);
153+
let res = fx.bcx.ins().iconcat(low, high);
154+
fx.bcx.ins().bitcast(types::F128, MemFlags::new(), res)
155+
}
156+
122157
pub(crate) fn codegen_cast(
123158
fx: &mut FunctionCx<'_, '_, '_>,
124159
from: Value,
@@ -222,6 +257,14 @@ pub(crate) fn codegen_cast(
222257
}
223258
}
224259

260+
pub(crate) fn fma_f16(fx: &mut FunctionCx<'_, '_, '_>, x: Value, y: Value, z: Value) -> Value {
261+
let x = f16_to_f64(fx, x);
262+
let y = f16_to_f64(fx, y);
263+
let z = f16_to_f64(fx, z);
264+
let res = fx.bcx.ins().fma(x, y, z);
265+
f64_to_f16(fx, res)
266+
}
267+
225268
pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
226269
fx.lib_call(
227270
"fminimumf128",

src/compiler_builtins.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ builtin_functions! {
109109
// float intrinsics
110110
fn __powisf2(a: f32, b: i32) -> f32;
111111
fn __powidf2(a: f64, b: i32) -> f64;
112+
fn __powitf2(a: f128, b: i32) -> f128;
112113
fn powf(a: f32, b: f32) -> f32;
113114
fn pow(a: f64, b: f64) -> f64;
114115
fn expf(f: f32) -> f32;
@@ -125,6 +126,19 @@ builtin_functions! {
125126
fn sin(f: f64) -> f64;
126127
fn cosf(f: f32) -> f32;
127128
fn cos(f: f64) -> f64;
129+
fn fmaf128(a: f128, b: f128, c: f128) -> f128;
130+
fn floorf16(f: f16) -> f16;
131+
fn floorf128(f: f128) -> f128;
132+
fn ceilf16(f: f16) -> f16;
133+
fn ceilf128(f: f128) -> f128;
134+
fn truncf16(f: f16) -> f16;
135+
fn truncf128(f: f128) -> f128;
136+
fn rintf16(f: f16) -> f16;
137+
fn rintf128(f: f128) -> f128;
138+
fn sqrtf16(f: f16) -> f16;
139+
fn sqrtf128(f: f128) -> f128;
140+
// FIXME(f16_f128): Add other float intrinsics as compiler-builtins gains support (meaning they
141+
// are available on all targets).
128142

129143
// allocator
130144
// NOTE: These need to be mentioned here despite not being part of compiler_builtins because

src/intrinsics/mod.rs

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,10 @@ fn bool_to_zero_or_max_uint<'tcx>(
249249
let ty = fx.clif_type(ty).unwrap();
250250

251251
let int_ty = match ty {
252+
types::F16 => types::I16,
252253
types::F32 => types::I32,
253254
types::F64 => types::I64,
255+
types::F128 => types::I128,
254256
ty => ty,
255257
};
256258

@@ -309,45 +311,83 @@ fn codegen_float_intrinsic_call<'tcx>(
309311
ret: CPlace<'tcx>,
310312
) -> bool {
311313
let (name, arg_count, ty, clif_ty) = match intrinsic {
314+
sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16),
312315
sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
313316
sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
317+
sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128),
318+
sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16),
314319
sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
315320
sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
321+
sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128),
322+
sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16),
316323
sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
317324
sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
325+
sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128),
326+
sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins
318327
sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
319328
sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
329+
sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins
330+
sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16),
320331
sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
321332
sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
333+
sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128),
334+
sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16),
322335
sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
323336
sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
337+
sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128),
338+
sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16),
324339
sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
325340
sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
341+
sym::log2f128 => ("log2f128", 1, fx.tcx.types.f128, types::F128),
342+
sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16),
326343
sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
327344
sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
345+
sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128),
346+
sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16),
328347
sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
329348
sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
349+
sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128),
350+
sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16),
330351
sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
331352
sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
353+
sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128),
332354
// FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation
355+
sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16
333356
sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32
334357
sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64
358+
sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128
359+
sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16),
335360
sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
336361
sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
362+
sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128),
363+
sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16),
337364
sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
338365
sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
366+
sym::floorf128 => ("floorf128", 1, fx.tcx.types.f128, types::F128),
367+
sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16),
339368
sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
340369
sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
370+
sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f128, types::F128),
371+
sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16),
341372
sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
342373
sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
374+
sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128),
375+
sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16),
343376
sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
344377
sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64),
378+
sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128),
379+
sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16),
345380
sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
346381
sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
382+
sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128),
383+
sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16),
347384
sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
348385
sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
386+
sym::sinf128 => ("sinf128", 1, fx.tcx.types.f128, types::F128),
387+
sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16),
349388
sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
350389
sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
390+
sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128),
351391
_ => return false,
352392
};
353393

@@ -380,13 +420,26 @@ fn codegen_float_intrinsic_call<'tcx>(
380420
};
381421

382422
let layout = fx.layout_of(ty);
423+
// FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift operations
424+
// for `f16` and `f128` once the lowerings have been implemented in Cranelift.
383425
let res = match intrinsic {
426+
sym::fmaf16 | sym::fmuladdf16 => {
427+
CValue::by_val(codegen_f16_f128::fma_f16(fx, args[0], args[1], args[2]), layout)
428+
}
384429
sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => {
385430
CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
386431
}
432+
sym::copysignf16 => {
433+
CValue::by_val(codegen_f16_f128::copysign_f16(fx, args[0], args[1]), layout)
434+
}
435+
sym::copysignf128 => {
436+
CValue::by_val(codegen_f16_f128::copysign_f128(fx, args[0], args[1]), layout)
437+
}
387438
sym::copysignf32 | sym::copysignf64 => {
388439
CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
389440
}
441+
sym::fabsf16 => CValue::by_val(codegen_f16_f128::abs_f16(fx, args[0]), layout),
442+
sym::fabsf128 => CValue::by_val(codegen_f16_f128::abs_f128(fx, args[0]), layout),
390443
sym::fabsf32
391444
| sym::fabsf64
392445
| sym::floorf32
@@ -416,12 +469,36 @@ fn codegen_float_intrinsic_call<'tcx>(
416469

417470
// These intrinsics aren't supported natively by Cranelift.
418471
// Lower them to a libcall.
419-
sym::powif32 | sym::powif64 => {
472+
sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => {
473+
let temp;
474+
let (clif_ty, args) = if intrinsic == sym::powif16 {
475+
temp = [codegen_f16_f128::f16_to_f32(fx, args[0]), args[1]];
476+
(types::F32, temp.as_slice())
477+
} else {
478+
(clif_ty, args)
479+
};
420480
let input_tys: Vec<_> =
421481
vec![AbiParam::new(clif_ty), lib_call_arg_param(fx.tcx, types::I32, true)];
422482
let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
483+
let ret_val = if intrinsic == sym::powif16 {
484+
codegen_f16_f128::f32_to_f16(fx, ret_val)
485+
} else {
486+
ret_val
487+
};
423488
CValue::by_val(ret_val, fx.layout_of(ty))
424489
}
490+
sym::powf16 => {
491+
// FIXME(f16_f128): Rust `compiler-builtins` doesn't export `powf16` yet.
492+
let x = codegen_f16_f128::f16_to_f32(fx, args[0]);
493+
let y = codegen_f16_f128::f16_to_f32(fx, args[1]);
494+
let ret_val = fx.lib_call(
495+
"powf",
496+
vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
497+
vec![AbiParam::new(types::F32)],
498+
&[x, y],
499+
)[0];
500+
CValue::by_val(codegen_f16_f128::f32_to_f16(fx, ret_val), fx.layout_of(ty))
501+
}
425502
_ => {
426503
let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect();
427504
let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];

0 commit comments

Comments
 (0)