Skip to content

Commit dedf2c6

Browse files
[AArch64] Refactor allocation of locals and stack realignment (llvm#72028)
Factor out some stack allocation in a separate function. This patch splits out the generic portion of a larger refactoring done as a part of stack clash protection support. The patch is almost, but not quite NFC. The only difference should be that where we have adjacent allocation of stack space for local SVE objects and non-local SVE objects the order of `sub sp, ...` and `addvl sp, ...` instructions is reversed, because now it's done with a single call to `emitFrameOffset` and it happens add/subtract the fixed part before the scalable part, e.g. addvl sp, sp, #-2 sub sp, sp, rust-lang#16, lsl rust-lang#12 sub sp, sp, rust-lang#16 becomes sub sp, sp, rust-lang#16, lsl rust-lang#12 sub sp, sp, rust-lang#16 addvl sp, sp, #-2
1 parent 1b781ee commit dedf2c6

11 files changed

+105
-95
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+59-55
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
296296
static bool produceCompactUnwindFrame(MachineFunction &MF);
297297
static bool needsWinCFI(const MachineFunction &MF);
298298
static StackOffset getSVEStackSize(const MachineFunction &MF);
299+
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
299300

300301
/// Returns true if a homogeneous prolog or epilog code can be emitted
301302
/// for the size optimization. If possible, a frame helper call is injected.
@@ -688,6 +689,44 @@ void AArch64FrameLowering::emitCalleeSavedSVERestores(
688689
emitCalleeSavedRestores(MBB, MBBI, true);
689690
}
690691

692+
void AArch64FrameLowering::allocateStackSpace(
693+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
694+
bool NeedsRealignment, StackOffset AllocSize, bool NeedsWinCFI,
695+
bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset) const {
696+
697+
if (!AllocSize)
698+
return;
699+
700+
DebugLoc DL;
701+
MachineFunction &MF = *MBB.getParent();
702+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
703+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
704+
AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
705+
const MachineFrameInfo &MFI = MF.getFrameInfo();
706+
707+
Register TargetReg =
708+
NeedsRealignment ? findScratchNonCalleeSaveRegister(&MBB) : AArch64::SP;
709+
// SUB Xd/SP, SP, AllocSize
710+
emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
711+
MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
712+
EmitCFI, InitialOffset);
713+
714+
if (NeedsRealignment) {
715+
const int64_t MaxAlign = MFI.getMaxAlign().value();
716+
const uint64_t AndMask = ~(MaxAlign - 1);
717+
// AND SP, Xd, 0b11111...0000
718+
BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
719+
.addReg(TargetReg, RegState::Kill)
720+
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
721+
.setMIFlags(MachineInstr::FrameSetup);
722+
AFI.setStackRealigned(true);
723+
724+
// No need for SEH instructions here; if we're realigning the stack,
725+
// we've set a frame pointer and already finished the SEH prologue.
726+
assert(!NeedsWinCFI);
727+
}
728+
}
729+
691730
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
692731
switch (Reg.id()) {
693732
default:
@@ -1774,7 +1813,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
17741813
}
17751814
}
17761815

1777-
StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
1816+
StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
17781817
MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
17791818

17801819
// Process the SVE callee-saves to determine what space needs to be
@@ -1787,67 +1826,32 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
17871826
++MBBI;
17881827
CalleeSavesEnd = MBBI;
17891828

1790-
AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
1791-
AllocateAfter = SVEStackSize - AllocateBefore;
1829+
SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
1830+
SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
17921831
}
17931832

17941833
// Allocate space for the callee saves (if any).
1795-
emitFrameOffset(
1796-
MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
1797-
MachineInstr::FrameSetup, false, false, nullptr,
1798-
EmitAsyncCFI && !HasFP && AllocateBefore,
1799-
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
1834+
StackOffset CFAOffset =
1835+
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
1836+
allocateStackSpace(MBB, CalleeSavesBegin, false, SVECalleeSavesSize, false,
1837+
nullptr, EmitAsyncCFI && !HasFP, CFAOffset);
1838+
CFAOffset += SVECalleeSavesSize;
18001839

18011840
if (EmitAsyncCFI)
18021841
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
18031842

1804-
// Finally allocate remaining SVE stack space.
1805-
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
1806-
-AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
1807-
nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
1808-
AllocateBefore + StackOffset::getFixed(
1809-
(int64_t)MFI.getStackSize() - NumBytes));
1810-
1811-
// Allocate space for the rest of the frame.
1812-
if (NumBytes) {
1813-
unsigned scratchSPReg = AArch64::SP;
1814-
1815-
if (NeedsRealignment) {
1816-
scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1817-
assert(scratchSPReg != AArch64::NoRegister);
1818-
}
1819-
1820-
// If we're a leaf function, try using the red zone.
1821-
if (!canUseRedZone(MF)) {
1822-
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1823-
// the correct value here, as NumBytes also includes padding bytes,
1824-
// which shouldn't be counted here.
1825-
emitFrameOffset(
1826-
MBB, MBBI, DL, scratchSPReg, AArch64::SP,
1827-
StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
1828-
false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
1829-
SVEStackSize +
1830-
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
1831-
}
1832-
if (NeedsRealignment) {
1833-
assert(MFI.getMaxAlign() > Align(1));
1834-
assert(scratchSPReg != AArch64::SP);
1835-
1836-
// SUB X9, SP, NumBytes
1837-
// -- X9 is temporary register, so shouldn't contain any live data here,
1838-
// -- free to use. This is already produced by emitFrameOffset above.
1839-
// AND SP, X9, 0b11111...0000
1840-
uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1841-
1842-
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1843-
.addReg(scratchSPReg, RegState::Kill)
1844-
.addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
1845-
AFI->setStackRealigned(true);
1846-
1847-
// No need for SEH instructions here; if we're realigning the stack,
1848-
// we've set a frame pointer and already finished the SEH prologue.
1849-
assert(!NeedsWinCFI);
1850-
}
1843+
// Allocate space for the rest of the frame including SVE locals. Align the
1844+
// stack as necessary.
1845+
assert(!(canUseRedZone(MF) && NeedsRealignment) &&
1846+
"Cannot use redzone with stack realignment");
1847+
if (!canUseRedZone(MF)) {
1848+
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1849+
// the correct value here, as NumBytes also includes padding bytes,
1850+
// which shouldn't be counted here.
1851+
allocateStackSpace(MBB, CalleeSavesEnd, NeedsRealignment,
1852+
SVELocalsSize + StackOffset::getFixed(NumBytes),
1853+
NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
1854+
CFAOffset);
18511855
}
18521856

18531857
// If we need a base pointer, set it up here. It's whatever the value of the

llvm/lib/Target/AArch64/AArch64FrameLowering.h

+5
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
150150
MachineBasicBlock::iterator MBBI) const;
151151
void emitCalleeSavedSVERestores(MachineBasicBlock &MBB,
152152
MachineBasicBlock::iterator MBBI) const;
153+
void allocateStackSpace(MachineBasicBlock &MBB,
154+
MachineBasicBlock::iterator MBBI,
155+
bool NeedsRealignment, StackOffset AllocSize,
156+
bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
157+
StackOffset InitialOffset) const;
153158

154159
/// Emit target zero call-used regs.
155160
void emitZeroCallUsedRegs(BitVector RegsToZero,

llvm/test/CodeGen/AArch64/framelayout-sve-basepointer.mir

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
name: hasBasepointer
55
# CHECK-LABEL: name: hasBasepointer
66
# CHECK: bb.0:
7-
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
8-
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
7+
# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
8+
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
99
# CHECK-NEXT: $x19 = ADDXri $sp, 0, 0
1010
# CHECK: STRXui $x0, $x19, 0
1111
tracksRegLiveness: true

llvm/test/CodeGen/AArch64/framelayout-sve-fixed-width-access.mir

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
; CHECK: // %bb.0: // %entry
88
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
99
; CHECK-NEXT: mov x29, sp
10+
; CHECK-NEXT: sub sp, sp, #2064
1011
; CHECK-NEXT: addvl sp, sp, #-32
1112
; CHECK-NEXT: addvl sp, sp, #-28
12-
; CHECK-NEXT: sub sp, sp, #2064
1313
; CHECK-NEXT: ldr x8, [sp, #2048]
1414
; CHECK-NEXT: addvl sp, sp, #31
1515
; CHECK-NEXT: addvl sp, sp, #29

llvm/test/CodeGen/AArch64/framelayout-sve-scavengingslot.mir

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
name: LateScavengingSlot
55
# CHECK-LABEL: name: LateScavengingSlot
66
# CHECK: bb.0:
7-
# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
8-
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 8, 12
7+
# CHECK: $sp = frame-setup SUBXri $sp, 8, 12
98
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
9+
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
1010
# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
1111
# CHECK-NEXT: $[[SCRATCH]] = ADDVL_XXI $fp, -1
1212
# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 0

0 commit comments

Comments
 (0)