Skip to content

Commit 24c84bd

Browse files
[AArch64] Async unwind - Fix MTE codegen emitting frame adjustments in a loop
When untagging the stack, the compiler may emit a sequence like: ``` .LBB0_1: st2g sp, [sp], rust-lang#32 sub x8, x8, rust-lang#32 cbnz x8, .LBB0_1 stg sp, [sp], rust-lang#16 ``` These stack adjustments cannot be described by CFI instructions. This patch disables merging of SP update with untagging, i.e. makes the compiler use an additional scratch register (there should be plenty available at this point as we are in the epilogue) and generate: ``` mov x9, sp mov x8, llvm#256 stg x9, [x9], rust-lang#16 .LBB0_1: sub x8, x8, rust-lang#32 st2g x9, [x9], rust-lang#32 cbnz x8, .LBB0_1 add sp, sp, llvm#272 ``` Merging is disabled only when we need to generate asynchronous unwind tables. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D114548
1 parent 5865a74 commit 24c84bd

File tree

3 files changed

+58
-22
lines changed

3 files changed

+58
-22
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3235,7 +3235,7 @@ class TagStoreEdit {
32353235
// instructions. May skip if the replacement is not profitable. May invalidate
32363236
// the input iterator and replace it with a valid one.
32373237
void emitCode(MachineBasicBlock::iterator &InsertI,
3238-
const AArch64FrameLowering *TFI, bool IsLast);
3238+
const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
32393239
};
32403240

32413241
void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
@@ -3374,7 +3374,8 @@ void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
33743374
}
33753375

33763376
void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
3377-
const AArch64FrameLowering *TFI, bool IsLast) {
3377+
const AArch64FrameLowering *TFI,
3378+
bool TryMergeSPUpdate) {
33783379
if (TagStores.empty())
33793380
return;
33803381
TagStoreInstr &FirstTagStore = TagStores[0];
@@ -3404,8 +3405,8 @@ void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
34043405
emitUnrolled(InsertI);
34053406
} else {
34063407
MachineInstr *UpdateInstr = nullptr;
3407-
int64_t TotalOffset;
3408-
if (IsLast) {
3408+
int64_t TotalOffset = 0;
3409+
if (TryMergeSPUpdate) {
34093410
// See if we can merge base register update into the STGloop.
34103411
// This is done in AArch64LoadStoreOptimizer for "normal" stores,
34113412
// but STGloop is way too unusual for that, and also it only
@@ -3550,15 +3551,19 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
35503551
for (auto &Instr : Instrs) {
35513552
if (EndOffset && *EndOffset != Instr.Offset) {
35523553
// Found a gap.
3553-
TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
3554+
TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
35543555
TSE.clear();
35553556
}
35563557

35573558
TSE.addInstruction(Instr);
35583559
EndOffset = Instr.Offset + Instr.Size;
35593560
}
35603561

3561-
TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
3562+
// Multiple FP/SP updates in a loop cannot be described by CFI instructions.
3563+
TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */
3564+
!MBB->getParent()
3565+
->getInfo<AArch64FunctionInfo>()
3566+
->needsAsyncDwarfUnwindInfo());
35623567

35633568
return InsertI;
35643569
}

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -589,23 +589,31 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
589589

590590
// Create a scratch register for the frame index elimination in an instruction.
591591
// This function has special handling of stack tagging loop pseudos, in which
592-
// case it can also change the instruction opcode (but not the operands).
592+
// case it can also change the instruction opcode.
593593
static Register
594-
createScratchRegisterForInstruction(MachineInstr &MI,
594+
createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
595595
const AArch64InstrInfo *TII) {
596596
// ST*Gloop have a reserved scratch register in operand 1. Use it, and also
597597
// replace the instruction with the writeback variant because it will now
598598
// satisfy the operand constraints for it.
599-
if (MI.getOpcode() == AArch64::STGloop) {
600-
MI.setDesc(TII->get(AArch64::STGloop_wback));
601-
return MI.getOperand(1).getReg();
602-
} else if (MI.getOpcode() == AArch64::STZGloop) {
603-
MI.setDesc(TII->get(AArch64::STZGloop_wback));
604-
return MI.getOperand(1).getReg();
599+
Register ScratchReg;
600+
if (MI.getOpcode() == AArch64::STGloop ||
601+
MI.getOpcode() == AArch64::STZGloop) {
602+
assert(FIOperandNum == 3 &&
603+
"Wrong frame index operand for STGloop/STZGloop");
604+
unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
605+
: AArch64::STZGloop_wback;
606+
ScratchReg = MI.getOperand(1).getReg();
607+
MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true);
608+
MI.setDesc(TII->get(Op));
609+
MI.tieOperands(1, 3);
605610
} else {
606-
return MI.getMF()->getRegInfo().createVirtualRegister(
607-
&AArch64::GPR64RegClass);
611+
ScratchReg =
612+
MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
613+
MI.getOperand(FIOperandNum)
614+
.ChangeToRegister(ScratchReg, false, false, true);
608615
}
616+
return ScratchReg;
609617
}
610618

611619
void AArch64RegisterInfo::getOffsetOpcodes(
@@ -722,9 +730,9 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
722730
// If we get here, the immediate doesn't fit into the instruction. We folded
723731
// as much as possible above. Handle the rest, providing a register that is
724732
// SP+LargeImm.
725-
Register ScratchReg = createScratchRegisterForInstruction(MI, TII);
733+
Register ScratchReg =
734+
createScratchRegisterForInstruction(MI, FIOperandNum, TII);
726735
emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
727-
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
728736
}
729737

730738
unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,

llvm/test/CodeGen/AArch64/settag.ll

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,44 @@ entry:
146146
ret void
147147
}
148148

149-
define void @stg_alloca17() uwtable {
149+
define void @stg_alloca17() nounwind {
150150
; CHECK-LABEL: stg_alloca17:
151151
; CHECK: // %bb.0: // %entry
152152
; CHECK-NEXT: sub sp, sp, #288
153-
; CHECK-NEXT: .cfi_def_cfa_offset 288
154-
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
155-
; CHECK-NEXT: .cfi_offset w29, -16
156153
; CHECK-NEXT: mov x8, #256
154+
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
157155
; CHECK-NEXT: .LBB11_1: // %entry
158156
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
159157
; CHECK-NEXT: st2g sp, [sp], #32
160158
; CHECK-NEXT: sub x8, x8, #32
161159
; CHECK-NEXT: cbnz x8, .LBB11_1
162160
; CHECK-NEXT: // %bb.2: // %entry
163161
; CHECK-NEXT: stg sp, [sp], #16
162+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
163+
; CHECK-NEXT: ret
164+
entry:
165+
%a = alloca i8, i32 272, align 16
166+
call void @llvm.aarch64.settag(i8* %a, i64 272)
167+
ret void
168+
}
169+
170+
define void @stg_alloca18() uwtable {
171+
; CHECK-LABEL: stg_alloca18:
172+
; CHECK: // %bb.0: // %entry
173+
; CHECK-NEXT: sub sp, sp, #288
174+
; CHECK-NEXT: .cfi_def_cfa_offset 288
175+
; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill
176+
; CHECK-NEXT: .cfi_offset w29, -16
177+
; CHECK-NEXT: mov x9, sp
178+
; CHECK-NEXT: mov x8, #256
179+
; CHECK-NEXT: stg x9, [x9], #16
180+
; CHECK-NEXT: .LBB12_1: // %entry
181+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
182+
; CHECK-NEXT: sub x8, x8, #32
183+
; CHECK-NEXT: st2g x9, [x9], #32
184+
; CHECK-NEXT: cbnz x8, .LBB12_1
185+
; CHECK-NEXT: // %bb.2: // %entry
186+
; CHECK-NEXT: add sp, sp, #272
164187
; CHECK-NEXT: .cfi_def_cfa_offset 16
165188
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
166189
; CHECK-NEXT: .cfi_def_cfa_offset 0

0 commit comments

Comments
 (0)