@@ -68,57 +68,31 @@ entry:
68
68
69
69
70
70
define double @streaming_caller_nonstreaming_callee (double %x ) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
71
- ; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee:
72
- ; CHECK-FISEL: // %bb.0: // %entry
73
- ; CHECK-FISEL-NEXT: sub sp, sp, #96
74
- ; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
75
- ; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
76
- ; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
77
- ; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
78
- ; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79
- ; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
80
- ; CHECK-FISEL-NEXT: smstop sm
81
- ; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
82
- ; CHECK-FISEL-NEXT: bl normal_callee
83
- ; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
84
- ; CHECK-FISEL-NEXT: smstart sm
85
- ; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0
86
- ; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
87
- ; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
88
- ; CHECK-FISEL-NEXT: fadd d0, d1, d0
89
- ; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
90
- ; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
91
- ; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
92
- ; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
93
- ; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
94
- ; CHECK-FISEL-NEXT: add sp, sp, #96
95
- ; CHECK-FISEL-NEXT: ret
96
- ;
97
- ; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee:
98
- ; CHECK-GISEL: // %bb.0: // %entry
99
- ; CHECK-GISEL-NEXT: sub sp, sp, #96
100
- ; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
101
- ; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
102
- ; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
103
- ; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
104
- ; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
105
- ; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
106
- ; CHECK-GISEL-NEXT: smstop sm
107
- ; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
108
- ; CHECK-GISEL-NEXT: bl normal_callee
109
- ; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
110
- ; CHECK-GISEL-NEXT: smstart sm
111
- ; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
112
- ; CHECK-GISEL-NEXT: fmov d0, x8
113
- ; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
114
- ; CHECK-GISEL-NEXT: fadd d0, d1, d0
115
- ; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
116
- ; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
117
- ; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
118
- ; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
119
- ; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
120
- ; CHECK-GISEL-NEXT: add sp, sp, #96
121
- ; CHECK-GISEL-NEXT: ret
71
+ ; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
72
+ ; CHECK-COMMON: // %bb.0: // %entry
73
+ ; CHECK-COMMON-NEXT: sub sp, sp, #96
74
+ ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
75
+ ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
76
+ ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
77
+ ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
78
+ ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
79
+ ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
80
+ ; CHECK-COMMON-NEXT: smstop sm
81
+ ; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
82
+ ; CHECK-COMMON-NEXT: bl normal_callee
83
+ ; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
84
+ ; CHECK-COMMON-NEXT: smstart sm
85
+ ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
86
+ ; CHECK-COMMON-NEXT: fmov d0, x8
87
+ ; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
88
+ ; CHECK-COMMON-NEXT: fadd d0, d1, d0
89
+ ; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
90
+ ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
91
+ ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
92
+ ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
93
+ ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
94
+ ; CHECK-COMMON-NEXT: add sp, sp, #96
95
+ ; CHECK-COMMON-NEXT: ret
122
96
entry:
123
97
%call = call double @normal_callee (double %x )
124
98
%add = fadd double %call , 4 .200000e+01
@@ -358,64 +332,95 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
358
332
ret fp128 %res
359
333
}
360
334
361
- ; FIXME: As above this should use Selection DAG to make sure the libcall call is lowered correctly.
335
+ ; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
362
336
define double @frem_call_za (double %a , double %b ) "aarch64_pstate_za_shared" nounwind {
363
- ; CHECK-FISEL-LABEL: frem_call_za:
364
- ; CHECK-FISEL: // %bb.0:
365
- ; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
366
- ; CHECK-FISEL-NEXT: mov x29, sp
367
- ; CHECK-FISEL-NEXT: sub sp, sp, #16
368
- ; CHECK-FISEL-NEXT: rdsvl x8, #1
369
- ; CHECK-FISEL-NEXT: mov x9, sp
370
- ; CHECK-FISEL-NEXT: mul x8, x8, x8
371
- ; CHECK-FISEL-NEXT: sub x9, x9, x8
372
- ; CHECK-FISEL-NEXT: mov sp, x9
373
- ; CHECK-FISEL-NEXT: stur x9, [x29, #-16]
374
- ; CHECK-FISEL-NEXT: sub x9, x29, #16
375
- ; CHECK-FISEL-NEXT: sturh w8, [x29, #-8]
376
- ; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x9
377
- ; CHECK-FISEL-NEXT: bl fmod
378
- ; CHECK-FISEL-NEXT: smstart za
379
- ; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0
380
- ; CHECK-FISEL-NEXT: sub x0, x29, #16
381
- ; CHECK-FISEL-NEXT: cbnz x8, .LBB10_2
382
- ; CHECK-FISEL-NEXT: // %bb.1:
383
- ; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore
384
- ; CHECK-FISEL-NEXT: .LBB10_2:
385
- ; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr
386
- ; CHECK-FISEL-NEXT: mov sp, x29
387
- ; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
388
- ; CHECK-FISEL-NEXT: ret
389
- ;
390
- ; CHECK-GISEL-LABEL: frem_call_za:
391
- ; CHECK-GISEL: // %bb.0:
392
- ; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
393
- ; CHECK-GISEL-NEXT: bl fmod
394
- ; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
395
- ; CHECK-GISEL-NEXT: ret
337
+ ; CHECK-COMMON-LABEL: frem_call_za:
338
+ ; CHECK-COMMON: // %bb.0:
339
+ ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
340
+ ; CHECK-COMMON-NEXT: mov x29, sp
341
+ ; CHECK-COMMON-NEXT: sub sp, sp, #16
342
+ ; CHECK-COMMON-NEXT: rdsvl x8, #1
343
+ ; CHECK-COMMON-NEXT: mov x9, sp
344
+ ; CHECK-COMMON-NEXT: mul x8, x8, x8
345
+ ; CHECK-COMMON-NEXT: sub x9, x9, x8
346
+ ; CHECK-COMMON-NEXT: mov sp, x9
347
+ ; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
348
+ ; CHECK-COMMON-NEXT: sub x9, x29, #16
349
+ ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
350
+ ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
351
+ ; CHECK-COMMON-NEXT: bl fmod
352
+ ; CHECK-COMMON-NEXT: smstart za
353
+ ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
354
+ ; CHECK-COMMON-NEXT: sub x0, x29, #16
355
+ ; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
356
+ ; CHECK-COMMON-NEXT: // %bb.1:
357
+ ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
358
+ ; CHECK-COMMON-NEXT: .LBB10_2:
359
+ ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
360
+ ; CHECK-COMMON-NEXT: mov sp, x29
361
+ ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
362
+ ; CHECK-COMMON-NEXT: ret
396
363
%res = frem double %a , %b
397
364
ret double %res
398
365
}
399
366
400
- ; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
367
+ ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
401
368
define float @frem_call_sm (float %a , float %b ) "aarch64_pstate_sm_enabled" nounwind {
402
369
; CHECK-COMMON-LABEL: frem_call_sm:
403
370
; CHECK-COMMON: // %bb.0:
404
- ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
371
+ ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
372
+ ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
373
+ ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
374
+ ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
375
+ ; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
376
+ ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill
377
+ ; CHECK-COMMON-NEXT: smstop sm
378
+ ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload
405
379
; CHECK-COMMON-NEXT: bl fmodf
406
- ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
380
+ ; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill
381
+ ; CHECK-COMMON-NEXT: smstart sm
382
+ ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
383
+ ; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload
384
+ ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
385
+ ; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
386
+ ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
387
+ ; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
407
388
; CHECK-COMMON-NEXT: ret
408
389
%res = frem float %a , %b
409
390
ret float %res
410
391
}
411
392
412
- ; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
393
+ ; As above this should use Selection DAG to make sure the libcall is lowered correctly.
413
394
define float @frem_call_sm_compat (float %a , float %b ) "aarch64_pstate_sm_compatible" nounwind {
414
395
; CHECK-COMMON-LABEL: frem_call_sm_compat:
415
396
; CHECK-COMMON: // %bb.0:
416
- ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
397
+ ; CHECK-COMMON-NEXT: sub sp, sp, #96
398
+ ; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
399
+ ; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
400
+ ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
401
+ ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
402
+ ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
403
+ ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
404
+ ; CHECK-COMMON-NEXT: bl __arm_sme_state
405
+ ; CHECK-COMMON-NEXT: and x19, x0, #0x1
406
+ ; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
407
+ ; CHECK-COMMON-NEXT: // %bb.1:
408
+ ; CHECK-COMMON-NEXT: smstop sm
409
+ ; CHECK-COMMON-NEXT: .LBB12_2:
410
+ ; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
417
411
; CHECK-COMMON-NEXT: bl fmodf
418
- ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
412
+ ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
413
+ ; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
414
+ ; CHECK-COMMON-NEXT: // %bb.3:
415
+ ; CHECK-COMMON-NEXT: smstart sm
416
+ ; CHECK-COMMON-NEXT: .LBB12_4:
417
+ ; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
418
+ ; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
419
+ ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
420
+ ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
421
+ ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
422
+ ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
423
+ ; CHECK-COMMON-NEXT: add sp, sp, #96
419
424
; CHECK-COMMON-NEXT: ret
420
425
%res = frem float %a , %b
421
426
ret float %res
0 commit comments