@@ -300,7 +300,6 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
300
300
break ;
301
301
case ARM::t2ADDri: // add.w r11, sp, #xx
302
302
case ARM::t2ADDri12: // add.w r11, sp, #xx
303
- case ARM::t2SUBri: // sub.w r4, r11, #xx
304
303
case ARM::t2MOVTi16: // movt r4, #xx
305
304
case ARM::t2MOVi16: // movw r4, #xx
306
305
case ARM::tBL: // bl __chkstk
@@ -633,15 +632,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
633
632
// / Unfortunately we cannot determine this value in determineCalleeSaves() yet
634
633
// / as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
635
634
// / this to produce a conservative estimate that we check in an assert() later.
636
- static int getMaxFPOffset (const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
635
+ static int getMaxFPOffset (const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
636
+ const MachineFunction &MF) {
637
637
// For Thumb1, push.w isn't available, so the first push will always push
638
638
// r7 and lr onto the stack first.
639
639
if (AFI.isThumb1OnlyFunction ())
640
640
return -AFI.getArgRegsSaveSize () - (2 * 4 );
641
641
// This is a conservative estimation: Assume the frame pointer being r7 and
642
642
// pc("r15") up to r8 getting spilled before (= 8 registers).
643
- int FPCXTSaveSize = (STI.hasV8_1MMainlineOps () && AFI.isCmseNSEntryFunction ()) ? 4 : 0 ;
644
- return - FPCXTSaveSize - AFI.getArgRegsSaveSize () - (8 * 4 );
643
+ int MaxRegBytes = 8 * 4 ;
644
+ if (STI.splitFramePointerPush (MF)) {
645
+ // Here, r11 can be stored below all of r4-r15 (3 registers more than
646
+ // above), plus d8-d15.
647
+ MaxRegBytes = 11 * 4 + 8 * 8 ;
648
+ }
649
+ int FPCXTSaveSize =
650
+ (STI.hasV8_1MMainlineOps () && AFI.isCmseNSEntryFunction ()) ? 4 : 0 ;
651
+ return -FPCXTSaveSize - AFI.getArgRegsSaveSize () - MaxRegBytes;
645
652
}
646
653
647
654
void ARMFrameLowering::emitPrologue (MachineFunction &MF,
@@ -704,42 +711,80 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
704
711
}
705
712
706
713
// Determine spill area sizes.
707
- for (const CalleeSavedInfo &I : CSI) {
708
- Register Reg = I.getReg ();
709
- int FI = I.getFrameIdx ();
710
- switch (Reg) {
711
- case ARM::R8:
712
- case ARM::R9:
713
- case ARM::R10:
714
- case ARM::R11:
715
- case ARM::R12:
716
- if (STI.splitFramePushPop (MF)) {
714
+ if (STI.splitFramePointerPush (MF)) {
715
+ for (const CalleeSavedInfo &I : CSI) {
716
+ Register Reg = I.getReg ();
717
+ int FI = I.getFrameIdx ();
718
+ switch (Reg) {
719
+ case ARM::R11:
720
+ case ARM::LR:
721
+ if (Reg == FramePtr)
722
+ FramePtrSpillFI = FI;
717
723
GPRCS2Size += 4 ;
718
724
break ;
725
+ case ARM::R0:
726
+ case ARM::R1:
727
+ case ARM::R2:
728
+ case ARM::R3:
729
+ case ARM::R4:
730
+ case ARM::R5:
731
+ case ARM::R6:
732
+ case ARM::R7:
733
+ case ARM::R8:
734
+ case ARM::R9:
735
+ case ARM::R10:
736
+ case ARM::R12:
737
+ GPRCS1Size += 4 ;
738
+ break ;
739
+ case ARM::FPCXTNS:
740
+ FPCXTSaveSize = 4 ;
741
+ break ;
742
+ default :
743
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
744
+ if (Reg == ARM::D8)
745
+ D8SpillFI = FI;
746
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
747
+ DPRCSSize += 8 ;
748
+ }
749
+ }
750
+ } else {
751
+ for (const CalleeSavedInfo &I : CSI) {
752
+ Register Reg = I.getReg ();
753
+ int FI = I.getFrameIdx ();
754
+ switch (Reg) {
755
+ case ARM::R8:
756
+ case ARM::R9:
757
+ case ARM::R10:
758
+ case ARM::R11:
759
+ case ARM::R12:
760
+ if (STI.splitFramePushPop (MF)) {
761
+ GPRCS2Size += 4 ;
762
+ break ;
763
+ }
764
+ LLVM_FALLTHROUGH;
765
+ case ARM::R0:
766
+ case ARM::R1:
767
+ case ARM::R2:
768
+ case ARM::R3:
769
+ case ARM::R4:
770
+ case ARM::R5:
771
+ case ARM::R6:
772
+ case ARM::R7:
773
+ case ARM::LR:
774
+ if (Reg == FramePtr)
775
+ FramePtrSpillFI = FI;
776
+ GPRCS1Size += 4 ;
777
+ break ;
778
+ case ARM::FPCXTNS:
779
+ FPCXTSaveSize = 4 ;
780
+ break ;
781
+ default :
782
+ // This is a DPR. Exclude the aligned DPRCS2 spills.
783
+ if (Reg == ARM::D8)
784
+ D8SpillFI = FI;
785
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
786
+ DPRCSSize += 8 ;
719
787
}
720
- LLVM_FALLTHROUGH;
721
- case ARM::R0:
722
- case ARM::R1:
723
- case ARM::R2:
724
- case ARM::R3:
725
- case ARM::R4:
726
- case ARM::R5:
727
- case ARM::R6:
728
- case ARM::R7:
729
- case ARM::LR:
730
- if (Reg == FramePtr)
731
- FramePtrSpillFI = FI;
732
- GPRCS1Size += 4 ;
733
- break ;
734
- case ARM::FPCXTNS:
735
- FPCXTSaveSize = 4 ;
736
- break ;
737
- default :
738
- // This is a DPR. Exclude the aligned DPRCS2 spills.
739
- if (Reg == ARM::D8)
740
- D8SpillFI = FI;
741
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs ())
742
- DPRCSSize += 8 ;
743
788
}
744
789
}
745
790
@@ -774,15 +819,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
774
819
unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
775
820
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
776
821
Align DPRAlign = DPRCSSize ? std::min (Align (8 ), Alignment) : Align (4 );
777
- unsigned DPRGapSize =
778
- (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
779
- DPRAlign.value ();
822
+ unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
823
+ if (!STI.splitFramePointerPush (MF)) {
824
+ DPRGapSize += GPRCS2Size;
825
+ }
826
+ DPRGapSize %= DPRAlign.value ();
780
827
781
- unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
828
+ unsigned DPRCSOffset;
829
+ if (STI.splitFramePointerPush (MF)) {
830
+ DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
831
+ GPRCS2Offset = DPRCSOffset - GPRCS2Size;
832
+ } else {
833
+ DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
834
+ }
782
835
int FramePtrOffsetInPush = 0 ;
783
836
if (HasFP) {
784
837
int FPOffset = MFI.getObjectOffset (FramePtrSpillFI);
785
- assert (getMaxFPOffset (STI, *AFI) <= FPOffset &&
838
+ assert (getMaxFPOffset (STI, *AFI, MF ) <= FPOffset &&
786
839
" Max FP estimation is wrong" );
787
840
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
788
841
AFI->setFramePtrSpillOffset (MFI.getObjectOffset (FramePtrSpillFI) +
@@ -793,7 +846,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
793
846
AFI->setDPRCalleeSavedAreaOffset (DPRCSOffset);
794
847
795
848
// Move past area 2.
796
- if (GPRCS2Size > 0 ) {
849
+ if (GPRCS2Size > 0 && !STI. splitFramePointerPush (MF) ) {
797
850
GPRCS2Push = LastPush = MBBI++;
798
851
DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
799
852
}
@@ -833,6 +886,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
833
886
} else
834
887
NumBytes = DPRCSOffset;
835
888
889
+ if (GPRCS2Size > 0 && STI.splitFramePointerPush (MF)) {
890
+ GPRCS2Push = LastPush = MBBI++;
891
+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
892
+ }
893
+
894
+ bool NeedsWinCFIStackAlloc = NeedsWinCFI;
895
+ if (STI.splitFramePointerPush (MF) && HasFP)
896
+ NeedsWinCFIStackAlloc = false ;
897
+
836
898
if (STI.isTargetWindows () && WindowsRequiresStackProbe (MF, NumBytes)) {
837
899
uint32_t NumWords = NumBytes >> 2 ;
838
900
@@ -888,7 +950,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
888
950
.setMIFlags (MachineInstr::FrameSetup)
889
951
.add (predOps (ARMCC::AL))
890
952
.add (condCodeOp ());
891
- if (NeedsWinCFI ) {
953
+ if (NeedsWinCFIStackAlloc ) {
892
954
SEH = BuildMI (MF, dl, TII.get (ARM::SEH_StackAlloc))
893
955
.addImm (NumBytes)
894
956
.addImm (/* Wide=*/ 1 )
@@ -927,13 +989,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
927
989
// into spill area 1, including the FP in R11. In either case, it
928
990
// is in area one and the adjustment needs to take place just after
929
991
// that push.
992
+ MachineBasicBlock::iterator AfterPush;
930
993
if (HasFP) {
931
- MachineBasicBlock::iterator AfterPush = std::next (GPRCS1Push);
994
+ AfterPush = std::next (GPRCS1Push);
932
995
unsigned PushSize = sizeOfSPAdjustment (*GPRCS1Push);
933
- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush,
934
- dl, TII, FramePtr, ARM::SP,
935
- PushSize + FramePtrOffsetInPush,
936
- MachineInstr::FrameSetup);
996
+ int FPOffset = PushSize + FramePtrOffsetInPush;
997
+ if (STI.splitFramePointerPush (MF)) {
998
+ AfterPush = std::next (GPRCS2Push);
999
+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1000
+ FramePtr, ARM::SP, 0 , MachineInstr::FrameSetup);
1001
+ } else {
1002
+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1003
+ FramePtr, ARM::SP, FPOffset,
1004
+ MachineInstr::FrameSetup);
1005
+ }
937
1006
if (!NeedsWinCFI) {
938
1007
if (FramePtrOffsetInPush + PushSize != 0 ) {
939
1008
unsigned CFIIndex = MF.addFrameInst (MCCFIInstruction::cfiDefCfa (
@@ -956,8 +1025,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
956
1025
// Emit a SEH opcode indicating the prologue end. The rest of the prologue
957
1026
// instructions below don't need to be replayed to unwind the stack.
958
1027
if (NeedsWinCFI && MBBI != MBB.begin ()) {
959
- insertSEHRange (MBB, {}, MBBI, TII, MachineInstr::FrameSetup);
960
- BuildMI (MBB, MBBI, dl, TII.get (ARM::SEH_PrologEnd))
1028
+ MachineBasicBlock::iterator End = MBBI;
1029
+ if (HasFP && STI.splitFramePointerPush (MF))
1030
+ End = AfterPush;
1031
+ insertSEHRange (MBB, {}, End, TII, MachineInstr::FrameSetup);
1032
+ BuildMI (MBB, End, dl, TII.get (ARM::SEH_PrologEnd))
961
1033
.setMIFlag (MachineInstr::FrameSetup);
962
1034
MF.setHasWinCFI (true );
963
1035
}
@@ -1483,7 +1555,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1483
1555
continue ;
1484
1556
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1485
1557
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore () == 0 &&
1486
- STI.hasV5TOps () && MBB.succ_empty () && !hasPAC) {
1558
+ STI.hasV5TOps () && MBB.succ_empty () && !hasPAC &&
1559
+ !STI.splitFramePointerPush (MF)) {
1487
1560
Reg = ARM::PC;
1488
1561
// Fold the return instruction into the LDM.
1489
1562
DeleteRet = true ;
@@ -1847,12 +1920,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
1847
1920
.addImm (-4 )
1848
1921
.add (predOps (ARMCC::AL));
1849
1922
}
1850
- emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea1Register, 0 ,
1851
- MachineInstr::FrameSetup);
1852
- emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea2Register, 0 ,
1853
- MachineInstr::FrameSetup);
1854
- emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1855
- NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1923
+ if (STI.splitFramePointerPush (MF)) {
1924
+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false ,
1925
+ &isSplitFPArea1Register, 0 , MachineInstr::FrameSetup);
1926
+ emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1927
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1928
+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false ,
1929
+ &isSplitFPArea2Register, 0 , MachineInstr::FrameSetup);
1930
+ } else {
1931
+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea1Register,
1932
+ 0 , MachineInstr::FrameSetup);
1933
+ emitPushInst (MBB, MI, CSI, PushOpc, PushOneOpc, false , &isARMArea2Register,
1934
+ 0 , MachineInstr::FrameSetup);
1935
+ emitPushInst (MBB, MI, CSI, FltOpc, 0 , true , &isARMArea3Register,
1936
+ NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
1937
+ }
1856
1938
1857
1939
// The code above does not insert spill code for the aligned DPRCS2 registers.
1858
1940
// The stack realignment code will be inserted between the push instructions
@@ -1880,14 +1962,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
1880
1962
emitAlignedDPRCS2Restores (MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1881
1963
1882
1964
unsigned PopOpc = AFI->isThumbFunction () ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1883
- unsigned LdrOpc = AFI->isThumbFunction () ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1965
+ unsigned LdrOpc =
1966
+ AFI->isThumbFunction () ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
1884
1967
unsigned FltOpc = ARM::VLDMDIA_UPD;
1885
- emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1886
- NumAlignedDPRCS2Regs);
1887
- emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1888
- &isARMArea2Register, 0 );
1889
- emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1890
- &isARMArea1Register, 0 );
1968
+ if (STI.splitFramePointerPush (MF)) {
1969
+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1970
+ &isSplitFPArea2Register, 0 );
1971
+ emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1972
+ NumAlignedDPRCS2Regs);
1973
+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1974
+ &isSplitFPArea1Register, 0 );
1975
+ } else {
1976
+ emitPopInst (MBB, MI, CSI, FltOpc, 0 , isVarArg, true , &isARMArea3Register,
1977
+ NumAlignedDPRCS2Regs);
1978
+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1979
+ &isARMArea2Register, 0 );
1980
+ emitPopInst (MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false ,
1981
+ &isARMArea1Register, 0 );
1982
+ }
1891
1983
1892
1984
return true ;
1893
1985
}
@@ -2287,7 +2379,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
2287
2379
//
2288
2380
// We could do slightly better on Thumb1; in some cases, an sp-relative
2289
2381
// offset would be legal even though an fp-relative offset is not.
2290
- int MaxFPOffset = getMaxFPOffset (STI, *AFI);
2382
+ int MaxFPOffset = getMaxFPOffset (STI, *AFI, MF );
2291
2383
bool HasLargeArgumentList =
2292
2384
HasFP && (MaxFixedOffset - MaxFPOffset) > (int )EstimatedRSFixedSizeLimit;
2293
2385
0 commit comments