Skip to content

Commit 11f4f16

Browse files
committed
Implement a hack for an intrinsic mapping where we need to output a different builtin based on an argument
1 parent 1e8354a commit 11f4f16

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

src/builder.rs

+1
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
370370
let args = {
371371
let function_address_names = self.function_address_names.borrow();
372372
let original_function_name = function_address_names.get(&func_ptr);
373+
func_ptr = llvm::adjust_function(self.context, &func_name, func_ptr, args);
373374
llvm::adjust_intrinsic_arguments(
374375
self,
375376
gcc_func,

src/intrinsic/llvm.rs

+32-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,42 @@
11
use std::borrow::Cow;
22

3-
use gccjit::CType;
3+
use gccjit::{CType, Context};
44
use gccjit::{Function, FunctionPtrType, RValue, ToRValue, UnaryOp};
55
use rustc_codegen_ssa::traits::BuilderMethods;
66

77
use crate::builder::Builder;
88
use crate::context::CodegenCx;
99

10+
pub fn adjust_function<'gcc>(
11+
context: &'gcc Context<'gcc>,
12+
func_name: &str,
13+
func_ptr: RValue<'gcc>,
14+
args: &[RValue<'gcc>],
15+
) -> RValue<'gcc> {
16+
// FIXME: we should not need this hack: this is required because both _mm_fcmadd_sch
17+
// and _mm_mask3_fcmadd_round_sch calls llvm.x86.avx512fp16.mask.vfcmadd.csh and we
18+
// seem to need to map this one LLVM intrinsic to 2 different GCC builtins.
19+
match func_name {
20+
"__builtin_ia32_vfcmaddcsh_mask3_round" => {
21+
if format!("{:?}", args[3]).ends_with("255") {
22+
return context
23+
.get_target_builtin_function("__builtin_ia32_vfcmaddcsh_mask_round")
24+
.get_address(None);
25+
}
26+
}
27+
"__builtin_ia32_vfmaddcsh_mask3_round" => {
28+
if format!("{:?}", args[3]).ends_with("255") {
29+
return context
30+
.get_target_builtin_function("__builtin_ia32_vfmaddcsh_mask_round")
31+
.get_address(None);
32+
}
33+
}
34+
_ => (),
35+
}
36+
37+
func_ptr
38+
}
39+
1040
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
1141
builder: &Builder<'a, 'gcc, 'tcx>,
1242
gcc_func: FunctionPtrType<'gcc>,
@@ -1198,7 +1228,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
11981228
"llvm.x86.avx512fp16.mask.vfcmul.csh" => "__builtin_ia32_vfcmulcsh_mask_round",
11991229
"llvm.x86.avx512fp16.mask.vfmadd.cph.512" => "__builtin_ia32_vfmaddcph512_mask3_round",
12001230
"llvm.x86.avx512fp16.maskz.vfmadd.cph.512" => "__builtin_ia32_vfmaddcph512_maskz_round",
1201-
"llvm.x86.avx512fp16.mask.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_mask_round",
1231+
"llvm.x86.avx512fp16.mask.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_mask3_round",
12021232
"llvm.x86.avx512fp16.maskz.vfmadd.csh" => "__builtin_ia32_vfmaddcsh_maskz_round",
12031233
"llvm.x86.avx512fp16.mask.vfcmadd.cph.512" => "__builtin_ia32_vfcmaddcph512_mask3_round",
12041234
"llvm.x86.avx512fp16.maskz.vfcmadd.cph.512" => "__builtin_ia32_vfcmaddcph512_maskz_round",

0 commit comments

Comments
 (0)