Skip to content

Commit b021464

Browse files
authored
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut. (llvm#109975)
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. PR: llvm#109975
1 parent 454abad commit b021464

36 files changed

+586
-392
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ class LoopVectorizationPlanner {
506506
// instructions leading from the loop exit instr to the phi need to be
507507
// converted to reductions, with one operand being vector and the other being
508508
// the scalar reduction chain. For other reductions, a select is introduced
509-
// between the phi and live-out recipes when folding the tail.
509+
// between the phi and users outside the vector region when folding the tail.
510510
void adjustRecipesForReductions(VPlanPtr &Plan,
511511
VPRecipeBuilder &RecipeBuilder,
512512
ElementCount MinVF);

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+60-71
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ class InnerLoopVectorizer {
498498
virtual std::pair<BasicBlock *, Value *>
499499
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);
500500

501-
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
501+
/// Fix the vectorized code, taking care of header phi's, and more.
502502
void fixVectorizedLoop(VPTransformState &State);
503503

504504
// Return true if any runtime check is added.
@@ -2713,7 +2713,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27132713
| |
27142714
(opt) v <-- edge from middle to exit iff epilogue is not required.
27152715
| [ ] \
2716-
| [ ]_| <-- old scalar loop to handle remainder (scalar epilogue).
2716+
| [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header
2717+
| | wrapped in VPIRBasicBlock).
27172718
\ |
27182719
\ v
27192720
>[ ] <-- exit block(s). (wrapped in VPIRBasicBlock)
@@ -2956,7 +2957,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29562957
// and there is nothing to fix from vector loop; phis should have incoming
29572958
// from scalar loop only.
29582959
} else {
2959-
// TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
2960+
// TODO: Check in VPlan to see if IV users need fixing instead of checking
29602961
// the cost model.
29612962

29622963
// If we inserted an edge from the middle block to the unique exit block,
@@ -2970,10 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29702971
IVEndValues[Entry.first], LoopMiddleBlock, State);
29712972
}
29722973

2973-
// Fix live-out phis not already fixed earlier.
2974-
for (const auto &KV : Plan.getLiveOuts())
2975-
KV.second->fixPhi(Plan, State);
2976-
29772974
for (Instruction *PI : PredicatedInstructions)
29782975
sinkScalarOperands(&*PI);
29792976

@@ -8790,6 +8787,41 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
87908787
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
87918788
}
87928789

8790+
/// Create resume phis in the scalar preheader for first-order recurrences and
8791+
/// reductions and update the VPIRInstructions wrapping the original phis in the
8792+
/// scalar header.
8793+
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8794+
auto *ScalarPH = Plan.getScalarPreheader();
8795+
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
8796+
VPBuilder ScalarPHBuilder(ScalarPH);
8797+
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8798+
VPValue *OneVPV = Plan.getOrAddLiveIn(
8799+
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
8800+
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
8801+
auto *ScalarPhiIRI = cast<VPIRInstruction>(&ScalarPhiR);
8802+
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction());
8803+
if (!ScalarPhiI)
8804+
break;
8805+
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
8806+
if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorPhiR))
8807+
continue;
8808+
// The backedge value provides the value to resume coming out of a loop,
8809+
// which for FORs is a vector whose last element needs to be extracted. The
8810+
// start value provides the value if the loop is bypassed.
8811+
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8812+
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8813+
if (IsFOR)
8814+
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
8815+
VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {},
8816+
"vector.recur.extract");
8817+
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
8818+
auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
8819+
VPInstruction::ResumePhi,
8820+
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8821+
ScalarPhiIRI->addOperand(ResumePhiR);
8822+
}
8823+
}
8824+
87938825
// Collect VPIRInstructions for phis in the original exit block that are modeled
87948826
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
87958827
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -8819,8 +8851,7 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlock(
88198851
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
88208852
// Exit values for inductions are computed and updated outside of VPlan and
88218853
// independent of induction recipes.
8822-
// TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
8823-
// live-outs.
8854+
// TODO: Compute induction exit values in VPlan.
88248855
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
88258856
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
88268857
isa<VPWidenPointerInductionRecipe>(V) ||
@@ -8853,7 +8884,8 @@ addUsersInExitBlock(VPlan &Plan,
88538884
// modeling the corresponding LCSSA phis.
88548885
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
88558886
VPValue *V = ExitIRI->getOperand(0);
8856-
// Pass live-in values used by exit phis directly through to the live-out.
8887+
// Pass live-in values used by exit phis directly through to their users in
8888+
// the exit block.
88578889
if (V->isLiveIn())
88588890
continue;
88598891

@@ -8865,39 +8897,17 @@ addUsersInExitBlock(VPlan &Plan,
88658897
}
88668898
}
88678899

8868-
/// Handle live-outs for first order reductions, both in the scalar preheader
8869-
/// and the original exit block:
8870-
/// 1. Feed a resume value for every FOR from the vector loop to the scalar
8871-
/// loop, if middle block branches to scalar preheader, by introducing
8872-
/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a
8873-
/// VPLiveOut which uses the latter and corresponds to the scalar header.
8874-
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in
8875-
/// the original exit block using a VPLiveOut.
8876-
static void addLiveOutsForFirstOrderRecurrences(
8900+
/// Handle users in the exit block for first order reductions in the original
8901+
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
8902+
/// users in the original exit block using the VPIRInstruction wrapping to the
8903+
/// LCSSA phi.
8904+
static void addExitUsersForFirstOrderRecurrences(
88778905
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
88788906
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
8879-
8880-
// Start by finding out if middle block branches to scalar preheader, which is
8881-
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
8882-
// middle block.
8883-
// TODO: Should be replaced by
8884-
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8885-
// scalar region is modeled as well.
8907+
auto *ScalarPHVPBB = Plan.getScalarPreheader();
88868908
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8887-
VPBasicBlock *ScalarPHVPBB = nullptr;
8888-
if (MiddleVPBB->getNumSuccessors() == 2) {
8889-
// Order is strict: first is the exit block, second is the scalar preheader.
8890-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8891-
} else if (ExitUsersToFix.empty()) {
8892-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8893-
} else {
8894-
llvm_unreachable("unsupported CFG in VPlan");
8895-
}
8896-
88978909
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
88988910
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
8899-
VPValue *OneVPV = Plan.getOrAddLiveIn(
8900-
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
89018911
VPValue *TwoVPV = Plan.getOrAddLiveIn(
89028912
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
89038913

@@ -8973,26 +8983,16 @@ static void addLiveOutsForFirstOrderRecurrences(
89738983
// lo = lcssa.phi [s1, scalar.body],
89748984
// [vector.recur.extract.for.phi, middle.block]
89758985
//
8976-
// Extract the resume value and create a new VPLiveOut for it.
8977-
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
8978-
{FOR->getBackedgeValue(), OneVPV},
8979-
{}, "vector.recur.extract");
8980-
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8981-
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
8982-
"scalar.recur.init");
8983-
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8984-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8985-
89868986
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
89878987
// Extract the penultimate value of the recurrence and use it as operand for
89888988
// the VPIRInstruction modeling the phi.
89898989
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
89908990
if (ExitIRI->getOperand(0) != FOR)
89918991
continue;
8992-
VPValue *Ext = MiddleBuilder.createNaryOp(
8992+
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
89938993
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
89948994
"vector.recur.extract.for.phi");
8995-
ExitIRI->setOperand(0, Ext);
8995+
ExitIRI->setOperand(0, PenultimateElement);
89968996
ExitUsersToFix.remove(ExitIRI);
89978997
}
89988998
}
@@ -9166,11 +9166,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91669166
"VPBasicBlock");
91679167
RecipeBuilder.fixHeaderPhis();
91689168

9169+
addScalarResumePhis(RecipeBuilder, *Plan);
91699170
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
91709171
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9171-
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9172+
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
91729173
addUsersInExitBlock(*Plan, ExitUsersToFix);
9173-
91749174
// ---------------------------------------------------------------------------
91759175
// Transform initial VPlan: Apply previously taken decisions, in order, to
91769176
// bring the VPlan to its final state.
@@ -9192,9 +9192,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91929192
// Replace VPValues for known constant strides guaranteed by predicate scalar
91939193
// evolution.
91949194
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
9195-
auto *R = dyn_cast<VPRecipeBase>(&U);
9196-
if (!R)
9197-
return false;
9195+
auto *R = cast<VPRecipeBase>(&U);
91989196
return R->getParent()->getParent() ||
91999197
R->getParent() ==
92009198
Plan->getVectorLoopRegion()->getSinglePredecessor();
@@ -9291,7 +9289,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92919289
// instructions leading from the loop exit instr to the phi need to be converted
92929290
// to reductions, with one operand being vector and the other being the scalar
92939291
// reduction chain. For other reductions, a select is introduced between the phi
9294-
// and live-out recipes when folding the tail.
9292+
// and users outside the vector region when folding the tail.
92959293
//
92969294
// A ComputeReductionResult recipe is added to the middle block, also for
92979295
// in-loop reductions which compute their result in-loop, because generating
@@ -9325,8 +9323,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93259323
for (VPUser *U : Cur->users()) {
93269324
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
93279325
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
9328-
assert(UserRecipe->getParent() == MiddleVPBB &&
9329-
"U must be either in the loop region or the middle block.");
9326+
assert((UserRecipe->getParent() == MiddleVPBB ||
9327+
UserRecipe->getParent() == Plan->getScalarPreheader()) &&
9328+
"U must be either in the loop region, the middle block or the "
9329+
"scalar preheader.");
93309330
continue;
93319331
}
93329332
Worklist.insert(UserRecipe);
@@ -9440,8 +9440,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94409440

94419441
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
94429442
// If tail is folded by masking, introduce selects between the phi
9443-
// and the live-out instruction of each reduction, at the beginning of the
9444-
// dedicated latch block.
9443+
// and the users outside the vector region of each reduction, at the
9444+
// beginning of the dedicated latch block.
94459445
auto *OrigExitingVPV = PhiR->getBackedgeValue();
94469446
auto *NewExitingVPV = PhiR->getBackedgeValue();
94479447
if (!PhiR->isInLoop() && CM.foldTailByMasking()) {
@@ -9513,17 +9513,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
95139513
});
95149514
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
95159515

9516-
// Order is strict: if there are multiple successors, the first is the exit
9517-
// block, second is the scalar preheader.
9518-
VPBasicBlock *ScalarPHVPBB =
9519-
cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back());
9520-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
9521-
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
9522-
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
9523-
{}, "bc.merge.rdx");
9524-
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
9525-
Plan->addLiveOut(RedPhi, ResumePhiRecipe);
9526-
95279516
// Adjust AnyOf reductions; replace the reduction phi for the selected value
95289517
// with a boolean reduction phi node to check if the condition is true in
95299518
// any iteration. The final value is selected by the final

0 commit comments

Comments
 (0)