Skip to content

Commit 2ab19bf

Browse files
committed
[ARM] Adjust the frame pointer when it's needed for SEH unwinding
For functions that require restoring SP from FP (e.g. that need to align the stack, or that have variable sized allocations), the prologue and epilogue previously used to look like this: push {r4-r5, r11, lr} add r11, sp, rust-lang#8 ... sub r4, r11, rust-lang#8 mov sp, r4 pop {r4-r5, r11, pc} This is problematic, because this unwinding operation (restoring sp from r11 - offset) can't be expressed with the SEH unwind opcodes (probably because this unwind procedure doesn't map exactly to individual instructions; note the detour via r4 in the epilogue too). To make unwinding work, the GPR push is split into two; the first one pushing all other registers, and the second one pushing r11+lr, so that r11 can be set pointing at this spot on the stack: push {r4-r5} push {r11, lr} mov r11, sp ... mov sp, r11 pop {r11, lr} pop {r4-r5} bx lr For the same setup, MSVC generates code that uses two registers; r11 still pointing at the {r11,lr} pair, but a separate register used for restoring the stack at the end: push {r4-r5, r7, r11, lr} add r11, sp, rust-lang#12 mov r7, sp ... mov sp, r7 pop {r4-r5, r7, r11, pc} For cases with clobbered float/vector registers, they are pushed after the GPRs, before the {r11,lr} pair. Differential Revision: https://reviews.llvm.org/D125649
1 parent d8e67c1 commit 2ab19bf

File tree

7 files changed

+265
-66
lines changed

7 files changed

+265
-66
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
7373
// GHC set of callee saved regs is empty as all those regs are
7474
// used for passing STG regs around
7575
return CSR_NoRegs_SaveList;
76+
} else if (STI.splitFramePointerPush(*MF)) {
77+
return CSR_Win_SplitFP_SaveList;
7678
} else if (F.getCallingConv() == CallingConv::CFGuard_Check) {
7779
return CSR_Win_AAPCS_CFGuard_Check_SaveList;
7880
} else if (F.getCallingConv() == CallingConv::SwiftTail) {

llvm/lib/Target/ARM/ARMBaseRegisterInfo.h

+27
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,33 @@ static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) {
7171
}
7272
}
7373

74+
static inline bool isSplitFPArea1Register(unsigned Reg,
75+
bool SplitFramePushPop) {
76+
using namespace ARM;
77+
78+
switch (Reg) {
79+
case R0: case R1: case R2: case R3:
80+
case R4: case R5: case R6: case R7:
81+
case R8: case R9: case R10: case R12:
82+
case SP: case PC:
83+
return true;
84+
default:
85+
return false;
86+
}
87+
}
88+
89+
static inline bool isSplitFPArea2Register(unsigned Reg,
90+
bool SplitFramePushPop) {
91+
using namespace ARM;
92+
93+
switch (Reg) {
94+
case R11: case LR:
95+
return true;
96+
default:
97+
return false;
98+
}
99+
}
100+
74101
static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) {
75102
using namespace ARM;
76103

llvm/lib/Target/ARM/ARMCallingConv.td

+4
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,10 @@ def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
289289
R11, R10, R9, R8,
290290
(sequence "D%u", 15, 8))>;
291291

292+
def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
293+
(sequence "D%u", 15, 8),
294+
LR, R11)>;
295+
292296
// R8 is used to pass swifterror, remove it from CSR.
293297
def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush,
294298
R8)>;

llvm/lib/Target/ARM/ARMFrameLowering.cpp

+158-66
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,6 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
300300
break;
301301
case ARM::t2ADDri: // add.w r11, sp, #xx
302302
case ARM::t2ADDri12: // add.w r11, sp, #xx
303-
case ARM::t2SUBri: // sub.w r4, r11, #xx
304303
case ARM::t2MOVTi16: // movt r4, #xx
305304
case ARM::t2MOVi16: // movw r4, #xx
306305
case ARM::tBL: // bl __chkstk
@@ -633,15 +632,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
633632
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
634633
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
635634
/// this to produce a conservative estimate that we check in an assert() later.
636-
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
635+
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
636+
const MachineFunction &MF) {
637637
// For Thumb1, push.w isn't available, so the first push will always push
638638
// r7 and lr onto the stack first.
639639
if (AFI.isThumb1OnlyFunction())
640640
return -AFI.getArgRegsSaveSize() - (2 * 4);
641641
// This is a conservative estimation: Assume the frame pointer being r7 and
642642
// pc("r15") up to r8 getting spilled before (= 8 registers).
643-
int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
644-
return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
643+
int MaxRegBytes = 8 * 4;
644+
if (STI.splitFramePointerPush(MF)) {
645+
// Here, r11 can be stored below all of r4-r15 (3 registers more than
646+
// above), plus d8-d15.
647+
MaxRegBytes = 11 * 4 + 8 * 8;
648+
}
649+
int FPCXTSaveSize =
650+
(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
651+
return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
645652
}
646653

647654
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
@@ -704,42 +711,80 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
704711
}
705712

706713
// Determine spill area sizes.
707-
for (const CalleeSavedInfo &I : CSI) {
708-
Register Reg = I.getReg();
709-
int FI = I.getFrameIdx();
710-
switch (Reg) {
711-
case ARM::R8:
712-
case ARM::R9:
713-
case ARM::R10:
714-
case ARM::R11:
715-
case ARM::R12:
716-
if (STI.splitFramePushPop(MF)) {
714+
if (STI.splitFramePointerPush(MF)) {
715+
for (const CalleeSavedInfo &I : CSI) {
716+
Register Reg = I.getReg();
717+
int FI = I.getFrameIdx();
718+
switch (Reg) {
719+
case ARM::R11:
720+
case ARM::LR:
721+
if (Reg == FramePtr)
722+
FramePtrSpillFI = FI;
717723
GPRCS2Size += 4;
718724
break;
725+
case ARM::R0:
726+
case ARM::R1:
727+
case ARM::R2:
728+
case ARM::R3:
729+
case ARM::R4:
730+
case ARM::R5:
731+
case ARM::R6:
732+
case ARM::R7:
733+
case ARM::R8:
734+
case ARM::R9:
735+
case ARM::R10:
736+
case ARM::R12:
737+
GPRCS1Size += 4;
738+
break;
739+
case ARM::FPCXTNS:
740+
FPCXTSaveSize = 4;
741+
break;
742+
default:
743+
// This is a DPR. Exclude the aligned DPRCS2 spills.
744+
if (Reg == ARM::D8)
745+
D8SpillFI = FI;
746+
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
747+
DPRCSSize += 8;
748+
}
749+
}
750+
} else {
751+
for (const CalleeSavedInfo &I : CSI) {
752+
Register Reg = I.getReg();
753+
int FI = I.getFrameIdx();
754+
switch (Reg) {
755+
case ARM::R8:
756+
case ARM::R9:
757+
case ARM::R10:
758+
case ARM::R11:
759+
case ARM::R12:
760+
if (STI.splitFramePushPop(MF)) {
761+
GPRCS2Size += 4;
762+
break;
763+
}
764+
LLVM_FALLTHROUGH;
765+
case ARM::R0:
766+
case ARM::R1:
767+
case ARM::R2:
768+
case ARM::R3:
769+
case ARM::R4:
770+
case ARM::R5:
771+
case ARM::R6:
772+
case ARM::R7:
773+
case ARM::LR:
774+
if (Reg == FramePtr)
775+
FramePtrSpillFI = FI;
776+
GPRCS1Size += 4;
777+
break;
778+
case ARM::FPCXTNS:
779+
FPCXTSaveSize = 4;
780+
break;
781+
default:
782+
// This is a DPR. Exclude the aligned DPRCS2 spills.
783+
if (Reg == ARM::D8)
784+
D8SpillFI = FI;
785+
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
786+
DPRCSSize += 8;
719787
}
720-
LLVM_FALLTHROUGH;
721-
case ARM::R0:
722-
case ARM::R1:
723-
case ARM::R2:
724-
case ARM::R3:
725-
case ARM::R4:
726-
case ARM::R5:
727-
case ARM::R6:
728-
case ARM::R7:
729-
case ARM::LR:
730-
if (Reg == FramePtr)
731-
FramePtrSpillFI = FI;
732-
GPRCS1Size += 4;
733-
break;
734-
case ARM::FPCXTNS:
735-
FPCXTSaveSize = 4;
736-
break;
737-
default:
738-
// This is a DPR. Exclude the aligned DPRCS2 spills.
739-
if (Reg == ARM::D8)
740-
D8SpillFI = FI;
741-
if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
742-
DPRCSSize += 8;
743788
}
744789
}
745790

@@ -774,15 +819,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
774819
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
775820
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
776821
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
777-
unsigned DPRGapSize =
778-
(GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
779-
DPRAlign.value();
822+
unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
823+
if (!STI.splitFramePointerPush(MF)) {
824+
DPRGapSize += GPRCS2Size;
825+
}
826+
DPRGapSize %= DPRAlign.value();
780827

781-
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
828+
unsigned DPRCSOffset;
829+
if (STI.splitFramePointerPush(MF)) {
830+
DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
831+
GPRCS2Offset = DPRCSOffset - GPRCS2Size;
832+
} else {
833+
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
834+
}
782835
int FramePtrOffsetInPush = 0;
783836
if (HasFP) {
784837
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
785-
assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
838+
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
786839
"Max FP estimation is wrong");
787840
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
788841
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
@@ -793,7 +846,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
793846
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
794847

795848
// Move past area 2.
796-
if (GPRCS2Size > 0) {
849+
if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
797850
GPRCS2Push = LastPush = MBBI++;
798851
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
799852
}
@@ -833,6 +886,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
833886
} else
834887
NumBytes = DPRCSOffset;
835888

889+
if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
890+
GPRCS2Push = LastPush = MBBI++;
891+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
892+
}
893+
894+
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
895+
if (STI.splitFramePointerPush(MF) && HasFP)
896+
NeedsWinCFIStackAlloc = false;
897+
836898
if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
837899
uint32_t NumWords = NumBytes >> 2;
838900

@@ -888,7 +950,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
888950
.setMIFlags(MachineInstr::FrameSetup)
889951
.add(predOps(ARMCC::AL))
890952
.add(condCodeOp());
891-
if (NeedsWinCFI) {
953+
if (NeedsWinCFIStackAlloc) {
892954
SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
893955
.addImm(NumBytes)
894956
.addImm(/*Wide=*/1)
@@ -927,13 +989,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
927989
// into spill area 1, including the FP in R11. In either case, it
928990
// is in area one and the adjustment needs to take place just after
929991
// that push.
992+
MachineBasicBlock::iterator AfterPush;
930993
if (HasFP) {
931-
MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push);
994+
AfterPush = std::next(GPRCS1Push);
932995
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
933-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush,
934-
dl, TII, FramePtr, ARM::SP,
935-
PushSize + FramePtrOffsetInPush,
936-
MachineInstr::FrameSetup);
996+
int FPOffset = PushSize + FramePtrOffsetInPush;
997+
if (STI.splitFramePointerPush(MF)) {
998+
AfterPush = std::next(GPRCS2Push);
999+
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1000+
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1001+
} else {
1002+
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1003+
FramePtr, ARM::SP, FPOffset,
1004+
MachineInstr::FrameSetup);
1005+
}
9371006
if (!NeedsWinCFI) {
9381007
if (FramePtrOffsetInPush + PushSize != 0) {
9391008
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
@@ -956,8 +1025,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
9561025
// Emit a SEH opcode indicating the prologue end. The rest of the prologue
9571026
// instructions below don't need to be replayed to unwind the stack.
9581027
if (NeedsWinCFI && MBBI != MBB.begin()) {
959-
insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
960-
BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
1028+
MachineBasicBlock::iterator End = MBBI;
1029+
if (HasFP && STI.splitFramePointerPush(MF))
1030+
End = AfterPush;
1031+
insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);
1032+
BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
9611033
.setMIFlag(MachineInstr::FrameSetup);
9621034
MF.setHasWinCFI(true);
9631035
}
@@ -1483,7 +1555,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
14831555
continue;
14841556
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
14851557
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1486-
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) {
1558+
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1559+
!STI.splitFramePointerPush(MF)) {
14871560
Reg = ARM::PC;
14881561
// Fold the return instruction into the LDM.
14891562
DeleteRet = true;
@@ -1847,12 +1920,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
18471920
.addImm(-4)
18481921
.add(predOps(ARMCC::AL));
18491922
}
1850-
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
1851-
MachineInstr::FrameSetup);
1852-
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
1853-
MachineInstr::FrameSetup);
1854-
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1855-
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1923+
if (STI.splitFramePointerPush(MF)) {
1924+
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
1925+
&isSplitFPArea1Register, 0, MachineInstr::FrameSetup);
1926+
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1927+
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1928+
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
1929+
&isSplitFPArea2Register, 0, MachineInstr::FrameSetup);
1930+
} else {
1931+
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
1932+
0, MachineInstr::FrameSetup);
1933+
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
1934+
0, MachineInstr::FrameSetup);
1935+
emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
1936+
NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1937+
}
18561938

18571939
// The code above does not insert spill code for the aligned DPRCS2 registers.
18581940
// The stack realignment code will be inserted between the push instructions
@@ -1880,14 +1962,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
18801962
emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
18811963

18821964
unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1883-
unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1965+
unsigned LdrOpc =
1966+
AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
18841967
unsigned FltOpc = ARM::VLDMDIA_UPD;
1885-
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1886-
NumAlignedDPRCS2Regs);
1887-
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1888-
&isARMArea2Register, 0);
1889-
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1890-
&isARMArea1Register, 0);
1968+
if (STI.splitFramePointerPush(MF)) {
1969+
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1970+
&isSplitFPArea2Register, 0);
1971+
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1972+
NumAlignedDPRCS2Regs);
1973+
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1974+
&isSplitFPArea1Register, 0);
1975+
} else {
1976+
emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1977+
NumAlignedDPRCS2Regs);
1978+
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1979+
&isARMArea2Register, 0);
1980+
emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1981+
&isARMArea1Register, 0);
1982+
}
18911983

18921984
return true;
18931985
}
@@ -2287,7 +2379,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
22872379
//
22882380
// We could do slightly better on Thumb1; in some cases, an sp-relative
22892381
// offset would be legal even though an fp-relative offset is not.
2290-
int MaxFPOffset = getMaxFPOffset(STI, *AFI);
2382+
int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
22912383
bool HasLargeArgumentList =
22922384
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
22932385

0 commit comments

Comments
 (0)