Skip to content

Commit 13c2514

Browse files
committed
[AArch64] Disable GlobalISel/FastISel for more SME functions
The patch D136361 disabled GlobalISel and FastISel for some SME functions, as the saving and restoring of SM is not yet handled. There were several tests added for fp128 fadd, which will be expanded to a libcall, that only happened to work by accident and did not handle other cases such as f32/f64 frem libcalls. This extends the cases where GlobalISel / FastISel is disabled for functions with SME attributes, under the assumption that it is difficult to tell what will become a libcall reliably, and so should fall back for all function until GlobalISel and/or FastISel can handle them. Differential Revision: https://reviews.llvm.org/D158490
1 parent 08f034f commit 13c2514

File tree

3 files changed

+100
-95
lines changed

3 files changed

+100
-95
lines changed

llvm/lib/Target/AArch64/AArch64FastISel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
51875187
const TargetLibraryInfo *LibInfo) {
51885188

51895189
SMEAttrs CallerAttrs(*FuncInfo.Fn);
5190-
if (CallerAttrs.hasZAState() ||
5191-
(!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
5190+
if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5191+
CallerAttrs.hasStreamingCompatibleInterface())
51925192
return nullptr;
51935193
return new AArch64FastISel(FuncInfo, LibInfo);
51945194
}

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
532532
}
533533

534534
SMEAttrs Attrs(F);
535-
if (Attrs.hasNewZAInterface() ||
536-
(!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody()))
535+
if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
536+
Attrs.hasStreamingCompatibleInterface())
537537
return true;
538538

539539
return false;

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 96 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -68,57 +68,31 @@ entry:
6868

6969

7070
define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
71-
; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee:
72-
; CHECK-FISEL: // %bb.0: // %entry
73-
; CHECK-FISEL-NEXT: sub sp, sp, #96
74-
; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
75-
; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
76-
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
77-
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
78-
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79-
; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
80-
; CHECK-FISEL-NEXT: smstop sm
81-
; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
82-
; CHECK-FISEL-NEXT: bl normal_callee
83-
; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
84-
; CHECK-FISEL-NEXT: smstart sm
85-
; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0
86-
; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
87-
; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
88-
; CHECK-FISEL-NEXT: fadd d0, d1, d0
89-
; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
90-
; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
91-
; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
92-
; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
93-
; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
94-
; CHECK-FISEL-NEXT: add sp, sp, #96
95-
; CHECK-FISEL-NEXT: ret
96-
;
97-
; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee:
98-
; CHECK-GISEL: // %bb.0: // %entry
99-
; CHECK-GISEL-NEXT: sub sp, sp, #96
100-
; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
101-
; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
102-
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
103-
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
104-
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
105-
; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
106-
; CHECK-GISEL-NEXT: smstop sm
107-
; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
108-
; CHECK-GISEL-NEXT: bl normal_callee
109-
; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
110-
; CHECK-GISEL-NEXT: smstart sm
111-
; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
112-
; CHECK-GISEL-NEXT: fmov d0, x8
113-
; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
114-
; CHECK-GISEL-NEXT: fadd d0, d1, d0
115-
; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
116-
; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
117-
; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
118-
; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
119-
; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
120-
; CHECK-GISEL-NEXT: add sp, sp, #96
121-
; CHECK-GISEL-NEXT: ret
71+
; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
72+
; CHECK-COMMON: // %bb.0: // %entry
73+
; CHECK-COMMON-NEXT: sub sp, sp, #96
74+
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
75+
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
76+
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
77+
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
78+
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79+
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
80+
; CHECK-COMMON-NEXT: smstop sm
81+
; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
82+
; CHECK-COMMON-NEXT: bl normal_callee
83+
; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
84+
; CHECK-COMMON-NEXT: smstart sm
85+
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
86+
; CHECK-COMMON-NEXT: fmov d0, x8
87+
; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
88+
; CHECK-COMMON-NEXT: fadd d0, d1, d0
89+
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
90+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
91+
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
92+
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
93+
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
94+
; CHECK-COMMON-NEXT: add sp, sp, #96
95+
; CHECK-COMMON-NEXT: ret
12296
entry:
12397
%call = call double @normal_callee(double %x)
12498
%add = fadd double %call, 4.200000e+01
@@ -358,64 +332,95 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
358332
ret fp128 %res
359333
}
360334

361-
; FIXME: As above this should use Selection DAG to make sure the libcall call is lowered correctly.
335+
; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
362336
define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
363-
; CHECK-FISEL-LABEL: frem_call_za:
364-
; CHECK-FISEL: // %bb.0:
365-
; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
366-
; CHECK-FISEL-NEXT: mov x29, sp
367-
; CHECK-FISEL-NEXT: sub sp, sp, #16
368-
; CHECK-FISEL-NEXT: rdsvl x8, #1
369-
; CHECK-FISEL-NEXT: mov x9, sp
370-
; CHECK-FISEL-NEXT: mul x8, x8, x8
371-
; CHECK-FISEL-NEXT: sub x9, x9, x8
372-
; CHECK-FISEL-NEXT: mov sp, x9
373-
; CHECK-FISEL-NEXT: stur x9, [x29, #-16]
374-
; CHECK-FISEL-NEXT: sub x9, x29, #16
375-
; CHECK-FISEL-NEXT: sturh w8, [x29, #-8]
376-
; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x9
377-
; CHECK-FISEL-NEXT: bl fmod
378-
; CHECK-FISEL-NEXT: smstart za
379-
; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0
380-
; CHECK-FISEL-NEXT: sub x0, x29, #16
381-
; CHECK-FISEL-NEXT: cbnz x8, .LBB10_2
382-
; CHECK-FISEL-NEXT: // %bb.1:
383-
; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore
384-
; CHECK-FISEL-NEXT: .LBB10_2:
385-
; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr
386-
; CHECK-FISEL-NEXT: mov sp, x29
387-
; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
388-
; CHECK-FISEL-NEXT: ret
389-
;
390-
; CHECK-GISEL-LABEL: frem_call_za:
391-
; CHECK-GISEL: // %bb.0:
392-
; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
393-
; CHECK-GISEL-NEXT: bl fmod
394-
; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
395-
; CHECK-GISEL-NEXT: ret
337+
; CHECK-COMMON-LABEL: frem_call_za:
338+
; CHECK-COMMON: // %bb.0:
339+
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
340+
; CHECK-COMMON-NEXT: mov x29, sp
341+
; CHECK-COMMON-NEXT: sub sp, sp, #16
342+
; CHECK-COMMON-NEXT: rdsvl x8, #1
343+
; CHECK-COMMON-NEXT: mov x9, sp
344+
; CHECK-COMMON-NEXT: mul x8, x8, x8
345+
; CHECK-COMMON-NEXT: sub x9, x9, x8
346+
; CHECK-COMMON-NEXT: mov sp, x9
347+
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
348+
; CHECK-COMMON-NEXT: sub x9, x29, #16
349+
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
350+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
351+
; CHECK-COMMON-NEXT: bl fmod
352+
; CHECK-COMMON-NEXT: smstart za
353+
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
354+
; CHECK-COMMON-NEXT: sub x0, x29, #16
355+
; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
356+
; CHECK-COMMON-NEXT: // %bb.1:
357+
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
358+
; CHECK-COMMON-NEXT: .LBB10_2:
359+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
360+
; CHECK-COMMON-NEXT: mov sp, x29
361+
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
362+
; CHECK-COMMON-NEXT: ret
396363
%res = frem double %a, %b
397364
ret double %res
398365
}
399366

400-
; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
367+
; As above this should use Selection DAG to make sure the libcall is lowered correctly.
401368
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
402369
; CHECK-COMMON-LABEL: frem_call_sm:
403370
; CHECK-COMMON: // %bb.0:
404-
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
371+
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
372+
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
373+
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
374+
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
375+
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
376+
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill
377+
; CHECK-COMMON-NEXT: smstop sm
378+
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload
405379
; CHECK-COMMON-NEXT: bl fmodf
406-
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
380+
; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill
381+
; CHECK-COMMON-NEXT: smstart sm
382+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
383+
; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload
384+
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
385+
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
386+
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
387+
; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
407388
; CHECK-COMMON-NEXT: ret
408389
%res = frem float %a, %b
409390
ret float %res
410391
}
411392

412-
; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
393+
; As above this should use Selection DAG to make sure the libcall is lowered correctly.
413394
define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
414395
; CHECK-COMMON-LABEL: frem_call_sm_compat:
415396
; CHECK-COMMON: // %bb.0:
416-
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
397+
; CHECK-COMMON-NEXT: sub sp, sp, #96
398+
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
399+
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
400+
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
401+
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
402+
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
403+
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
404+
; CHECK-COMMON-NEXT: bl __arm_sme_state
405+
; CHECK-COMMON-NEXT: and x19, x0, #0x1
406+
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
407+
; CHECK-COMMON-NEXT: // %bb.1:
408+
; CHECK-COMMON-NEXT: smstop sm
409+
; CHECK-COMMON-NEXT: .LBB12_2:
410+
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
417411
; CHECK-COMMON-NEXT: bl fmodf
418-
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
412+
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
413+
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
414+
; CHECK-COMMON-NEXT: // %bb.3:
415+
; CHECK-COMMON-NEXT: smstart sm
416+
; CHECK-COMMON-NEXT: .LBB12_4:
417+
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
418+
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
419+
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
420+
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
421+
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
422+
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
423+
; CHECK-COMMON-NEXT: add sp, sp, #96
419424
; CHECK-COMMON-NEXT: ret
420425
%res = frem float %a, %b
421426
ret float %res

0 commit comments

Comments
 (0)