Skip to content

Commit d9ae493

Browse files
committed
[PGO][PGSO] Instrument the code gen / target passes.
Summary: Split off of D67120. Add the profile guided size optimization instrumentation / queries in the code gen or target passes. This doesn't enable the size optimizations in those passes yet as they are currently disabled in shouldOptimizeForSize (for non-IR pass queries). A second try after reverted D71072. Reviewers: davidxl Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71149
1 parent 505aa24 commit d9ae493

22 files changed

+290
-61
lines changed

llvm/include/llvm/CodeGen/AsmPrinter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class GlobalObject;
4848
class GlobalValue;
4949
class GlobalVariable;
5050
class MachineBasicBlock;
51+
class MachineBlockFrequencyInfo;
5152
class MachineConstantPoolValue;
5253
class MachineDominatorTree;
5354
class MachineFunction;
@@ -69,6 +70,7 @@ class MCSymbol;
6970
class MCTargetOptions;
7071
class MDNode;
7172
class Module;
73+
class ProfileSummaryInfo;
7274
class raw_ostream;
7375
class RemarkStreamer;
7476
class StackMaps;
@@ -108,6 +110,10 @@ class AsmPrinter : public MachineFunctionPass {
108110
/// Optimization remark emitter.
109111
MachineOptimizationRemarkEmitter *ORE;
110112

113+
MachineBlockFrequencyInfo *MBFI;
114+
115+
ProfileSummaryInfo *PSI;
116+
111117
/// The symbol for the current function. This is recalculated at the beginning
112118
/// of each call to runOnMachineFunction().
113119
MCSymbol *CurrentFnSym = nullptr;

llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ class MachineOptimizationRemarkEmitter {
182182
}
183183
}
184184

185+
MachineBlockFrequencyInfo *getBFI() {
186+
return MBFI;
187+
}
188+
185189
private:
186190
MachineFunction &MF;
187191

llvm/include/llvm/CodeGen/TailDuplicator.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
namespace llvm {
2626

2727
class MachineBasicBlock;
28+
class MachineBlockFrequencyInfo;
2829
class MachineBranchProbabilityInfo;
2930
class MachineFunction;
3031
class MachineInstr;
3132
class MachineModuleInfo;
3233
class MachineRegisterInfo;
34+
class ProfileSummaryInfo;
3335
class TargetRegisterInfo;
3436

3537
/// Utility class to perform tail duplication.
@@ -40,6 +42,8 @@ class TailDuplicator {
4042
const MachineModuleInfo *MMI;
4143
MachineRegisterInfo *MRI;
4244
MachineFunction *MF;
45+
const MachineBlockFrequencyInfo *MBFI;
46+
ProfileSummaryInfo *PSI;
4347
bool PreRegAlloc;
4448
bool LayoutMode;
4549
unsigned TailDupSize;
@@ -65,6 +69,8 @@ class TailDuplicator {
6569
/// default implies using the command line value TailDupSize.
6670
void initMF(MachineFunction &MF, bool PreRegAlloc,
6771
const MachineBranchProbabilityInfo *MBPI,
72+
const MachineBlockFrequencyInfo *MBFI,
73+
ProfileSummaryInfo *PSI,
6874
bool LayoutMode, unsigned TailDupSize = 0);
6975

7076
bool tailDuplicateBlocks();

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,16 @@
3131
#include "llvm/Analysis/ConstantFolding.h"
3232
#include "llvm/Analysis/EHPersonalities.h"
3333
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
34+
#include "llvm/Analysis/ProfileSummaryInfo.h"
3435
#include "llvm/BinaryFormat/COFF.h"
3536
#include "llvm/BinaryFormat/Dwarf.h"
3637
#include "llvm/BinaryFormat/ELF.h"
3738
#include "llvm/CodeGen/GCMetadata.h"
3839
#include "llvm/CodeGen/GCMetadataPrinter.h"
3940
#include "llvm/CodeGen/GCStrategy.h"
41+
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
4042
#include "llvm/CodeGen/MachineBasicBlock.h"
43+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
4144
#include "llvm/CodeGen/MachineConstantPool.h"
4245
#include "llvm/CodeGen/MachineDominators.h"
4346
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -52,6 +55,7 @@
5255
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
5356
#include "llvm/CodeGen/MachineOperand.h"
5457
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
58+
#include "llvm/CodeGen/MachineSizeOpts.h"
5559
#include "llvm/CodeGen/StackMaps.h"
5660
#include "llvm/CodeGen/TargetFrameLowering.h"
5761
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -248,6 +252,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
248252
AU.addRequired<MachineModuleInfoWrapperPass>();
249253
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
250254
AU.addRequired<GCModuleInfo>();
255+
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
256+
AU.addRequired<ProfileSummaryInfoWrapperPass>();
251257
}
252258

253259
bool AsmPrinter::doInitialization(Module &M) {
@@ -1684,6 +1690,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
16841690
}
16851691

16861692
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
1693+
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1694+
MBFI = (PSI && PSI->hasProfileSummary()) ?
1695+
// ORE conditionally computes MBFI. If available, use it, otherwise
1696+
// request it.
1697+
(ORE->getBFI() ? ORE->getBFI() :
1698+
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
1699+
nullptr;
16871700
}
16881701

16891702
namespace {
@@ -2913,8 +2926,10 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
29132926
void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
29142927
MCCodePaddingContext &Context) const {
29152928
assert(MF != nullptr && "Machine function must be valid");
2929+
bool OptForSize = MF->getFunction().hasOptSize() ||
2930+
llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
29162931
Context.IsPaddingActive = !MF->hasInlineAsm() &&
2917-
!MF->getFunction().hasOptSize() &&
2932+
!OptForSize &&
29182933
TM.getOptLevel() != CodeGenOpt::None;
29192934
Context.IsBasicBlockReachableViaFallthrough =
29202935
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/SmallSet.h"
2525
#include "llvm/ADT/SmallVector.h"
2626
#include "llvm/ADT/Statistic.h"
27+
#include "llvm/Analysis/ProfileSummaryInfo.h"
2728
#include "llvm/CodeGen/Analysis.h"
2829
#include "llvm/CodeGen/LivePhysRegs.h"
2930
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +39,7 @@
3839
#include "llvm/CodeGen/MachineModuleInfo.h"
3940
#include "llvm/CodeGen/MachineOperand.h"
4041
#include "llvm/CodeGen/MachineRegisterInfo.h"
42+
#include "llvm/CodeGen/MachineSizeOpts.h"
4143
#include "llvm/CodeGen/TargetInstrInfo.h"
4244
#include "llvm/CodeGen/TargetOpcodes.h"
4345
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -103,6 +105,7 @@ namespace {
103105
void getAnalysisUsage(AnalysisUsage &AU) const override {
104106
AU.addRequired<MachineBlockFrequencyInfo>();
105107
AU.addRequired<MachineBranchProbabilityInfo>();
108+
AU.addRequired<ProfileSummaryInfoWrapperPass>();
106109
AU.addRequired<TargetPassConfig>();
107110
MachineFunctionPass::getAnalysisUsage(AU);
108111
}
@@ -129,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
129132
BranchFolder::MBFIWrapper MBBFreqInfo(
130133
getAnalysis<MachineBlockFrequencyInfo>());
131134
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
132-
getAnalysis<MachineBranchProbabilityInfo>());
135+
getAnalysis<MachineBranchProbabilityInfo>(),
136+
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
133137
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
134138
return Folder.OptimizeFunction(
135139
MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
@@ -139,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
139143
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
140144
MBFIWrapper &FreqInfo,
141145
const MachineBranchProbabilityInfo &ProbInfo,
146+
ProfileSummaryInfo *PSI,
142147
unsigned MinTailLength)
143148
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
144-
MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
149+
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
145150
if (MinCommonTailLength == 0)
146151
MinCommonTailLength = TailMergeSize;
147152
switch (FlagEnableTailMerge) {
@@ -585,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
585590
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
586591
MachineBasicBlock *PredBB,
587592
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
588-
bool AfterPlacement) {
593+
bool AfterPlacement,
594+
BranchFolder::MBFIWrapper &MBBFreqInfo,
595+
ProfileSummaryInfo *PSI) {
589596
// It is never profitable to tail-merge blocks from two different EH scopes.
590597
if (!EHScopeMembership.empty()) {
591598
auto EHScope1 = EHScopeMembership.find(MBB1);
@@ -682,7 +689,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
682689
// branch instruction, which is likely to be smaller than the 2
683690
// instructions that would be deleted in the merge.
684691
MachineFunction *MF = MBB1->getParent();
685-
return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
692+
bool OptForSize =
693+
MF->getFunction().hasOptSize() ||
694+
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
695+
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
696+
return EffectiveTailLen >= 2 && OptForSize &&
686697
(FullBlockTail1 || FullBlockTail2);
687698
}
688699

@@ -704,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
704715
CommonTailLen, TrialBBI1, TrialBBI2,
705716
SuccBB, PredBB,
706717
EHScopeMembership,
707-
AfterBlockPlacement)) {
718+
AfterBlockPlacement, MBBFreqInfo, PSI)) {
708719
if (CommonTailLen > maxCommonTailLength) {
709720
SameTails.clear();
710721
maxCommonTailLength = CommonTailLen;
@@ -1534,8 +1545,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
15341545
}
15351546
}
15361547

1537-
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
1538-
MF.getFunction().hasOptSize()) {
1548+
bool OptForSize =
1549+
MF.getFunction().hasOptSize() ||
1550+
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
1551+
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
15391552
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
15401553
// direction, thereby defeating careful block placement and regressing
15411554
// performance. Therefore, only consider this for optsize functions.

llvm/lib/CodeGen/BranchFolding.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class MachineFunction;
2727
class MachineLoopInfo;
2828
class MachineModuleInfo;
2929
class MachineRegisterInfo;
30+
class ProfileSummaryInfo;
3031
class raw_ostream;
3132
class TargetInstrInfo;
3233
class TargetRegisterInfo;
@@ -39,6 +40,7 @@ class TargetRegisterInfo;
3940
bool CommonHoist,
4041
MBFIWrapper &FreqInfo,
4142
const MachineBranchProbabilityInfo &ProbInfo,
43+
ProfileSummaryInfo *PSI,
4244
// Min tail length to merge. Defaults to commandline
4345
// flag. Ignored for optsize.
4446
unsigned MinTailLength = 0);
@@ -145,6 +147,7 @@ class TargetRegisterInfo;
145147
const BlockFrequency Freq) const;
146148
void view(const Twine &Name, bool isSimple = true);
147149
uint64_t getEntryFreq() const;
150+
const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
148151

149152
private:
150153
const MachineBlockFrequencyInfo &MBFI;
@@ -154,6 +157,7 @@ class TargetRegisterInfo;
154157
private:
155158
MBFIWrapper &MBBFreqInfo;
156159
const MachineBranchProbabilityInfo &MBPI;
160+
ProfileSummaryInfo *PSI;
157161

158162
bool TailMergeBlocks(MachineFunction &MF);
159163
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,

0 commit comments

Comments
 (0)