Skip to content

Commit fe8ac0f

Browse files
wenju-heyubingex007-a11y
authored andcommitted
[x86] Fix Intel OpenCL builtin CalleeSavedRegs on skx
Summary: Align with AVX512 builtins implementations, some of which don't preserve rdi. Reviewers: yubing, tianqing, craig.topper Reviewed By: craig.topper Subscribers: yaxunl, Anastasia, hiraditya Differential Revision: https://reviews.llvm.org/D77032
1 parent 71f1ab5 commit fe8ac0f

File tree

2 files changed

+5
-9
lines changed

2 files changed

+5
-9
lines changed

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def CSR_64_Intel_OCL_BI : CalleeSavedRegs<(add CSR_64,
11451145
def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
11461146
(sequence "YMM%u", 8, 15))>;
11471147

1148-
def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
1148+
def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
11491149
(sequence "ZMM%u", 16, 31),
11501150
K4, K5, K6, K7)>;
11511151

llvm/test/CodeGen/X86/avx512-intel-ocl.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
294294
; X64-KNL-LABEL: test_prolog_epilog:
295295
; X64-KNL: ## %bb.0:
296296
; X64-KNL-NEXT: pushq %rsi
297-
; X64-KNL-NEXT: pushq %rdi
298-
; X64-KNL-NEXT: subq $1064, %rsp ## imm = 0x428
297+
; X64-KNL-NEXT: subq $1072, %rsp ## imm = 0x430
299298
; X64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
300299
; X64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
301300
; X64-KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
@@ -337,16 +336,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
337336
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
338337
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
339338
; X64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
340-
; X64-KNL-NEXT: addq $1064, %rsp ## imm = 0x428
341-
; X64-KNL-NEXT: popq %rdi
339+
; X64-KNL-NEXT: addq $1072, %rsp ## imm = 0x430
342340
; X64-KNL-NEXT: popq %rsi
343341
; X64-KNL-NEXT: retq
344342
;
345343
; X64-SKX-LABEL: test_prolog_epilog:
346344
; X64-SKX: ## %bb.0:
347345
; X64-SKX-NEXT: pushq %rsi
348-
; X64-SKX-NEXT: pushq %rdi
349-
; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
346+
; X64-SKX-NEXT: subq $1072, %rsp ## imm = 0x430
350347
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
351348
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
352349
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
@@ -388,8 +385,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
388385
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
389386
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
390387
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
391-
; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
392-
; X64-SKX-NEXT: popq %rdi
388+
; X64-SKX-NEXT: addq $1072, %rsp ## imm = 0x430
393389
; X64-SKX-NEXT: popq %rsi
394390
; X64-SKX-NEXT: retq
395391
%c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)

0 commit comments

Comments
 (0)