@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
543
543
protected:
544
544
friend class LoopVectorizationPlanner ;
545
545
546
- // / Set up the values of the IVs correctly when exiting the vector loop.
547
- virtual void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
548
- Value *VectorTripCount, BasicBlock *MiddleBlock,
549
- VPTransformState &State);
550
-
551
546
// / Iteratively sink the scalarized operands of a predicated instruction into
552
547
// / the block that was created for it.
553
548
void sinkScalarOperands (Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
785
780
BasicBlock *emitIterationCountCheck (BasicBlock *Bypass, bool ForEpilogue);
786
781
void printDebugTracesAtStart () override ;
787
782
void printDebugTracesAtEnd () override ;
788
-
789
- void fixupIVUsers (PHINode *OrigPhi, const InductionDescriptor &II,
790
- Value *VectorTripCount, BasicBlock *MiddleBlock,
791
- VPTransformState &State) override {};
792
783
};
793
784
794
785
// A specialized derived class of inner loop vectorizer that performs
@@ -2782,97 +2773,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
2782
2773
return LoopVectorPreHeader;
2783
2774
}
2784
2775
2785
- // Fix up external users of the induction variable. At this point, we are
2786
- // in LCSSA form, with all external PHIs that use the IV having one input value,
2787
- // coming from the remainder loop. We need those PHIs to also have a correct
2788
- // value for the IV when arriving directly from the middle block.
2789
- void InnerLoopVectorizer::fixupIVUsers (PHINode *OrigPhi,
2790
- const InductionDescriptor &II,
2791
- Value *VectorTripCount,
2792
- BasicBlock *MiddleBlock,
2793
- VPTransformState &State) {
2794
- // There are two kinds of external IV usages - those that use the value
2795
- // computed in the last iteration (the PHI) and those that use the penultimate
2796
- // value (the value that feeds into the phi from the loop latch).
2797
- // We allow both, but they, obviously, have different values.
2798
-
2799
- DenseMap<Value *, Value *> MissingVals;
2800
-
2801
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
2802
- OrigLoop->getLoopPreheader ()))
2803
- ->getIncomingValueForBlock (MiddleBlock);
2804
-
2805
- // An external user of the last iteration's value should see the value that
2806
- // the remainder loop uses to initialize its own IV.
2807
- Value *PostInc = OrigPhi->getIncomingValueForBlock (OrigLoop->getLoopLatch ());
2808
- for (User *U : PostInc->users ()) {
2809
- Instruction *UI = cast<Instruction>(U);
2810
- if (!OrigLoop->contains (UI)) {
2811
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2812
- MissingVals[UI] = EndValue;
2813
- }
2814
- }
2815
-
2816
- // An external user of the penultimate value need to see EndValue - Step.
2817
- // The simplest way to get this is to recompute it from the constituent SCEVs,
2818
- // that is Start + (Step * (CRD - 1)).
2819
- for (User *U : OrigPhi->users ()) {
2820
- auto *UI = cast<Instruction>(U);
2821
- if (!OrigLoop->contains (UI)) {
2822
- assert (isa<PHINode>(UI) && " Expected LCSSA form" );
2823
- IRBuilder<> B (MiddleBlock->getTerminator ());
2824
-
2825
- // Fast-math-flags propagate from the original induction instruction.
2826
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2827
- B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2828
-
2829
- VPValue *StepVPV = Plan.getSCEVExpansion (II.getStep ());
2830
- assert (StepVPV && " step must have been expanded during VPlan execution" );
2831
- Value *Step = StepVPV->isLiveIn () ? StepVPV->getLiveInIRValue ()
2832
- : State.get (StepVPV, VPLane (0 ));
2833
- Value *Escape = nullptr ;
2834
- if (EndValue->getType ()->isIntegerTy ())
2835
- Escape = B.CreateSub (EndValue, Step);
2836
- else if (EndValue->getType ()->isPointerTy ())
2837
- Escape = B.CreatePtrAdd (EndValue, B.CreateNeg (Step));
2838
- else {
2839
- assert (EndValue->getType ()->isFloatingPointTy () &&
2840
- " Unexpected induction type" );
2841
- Escape = B.CreateBinOp (II.getInductionBinOp ()->getOpcode () ==
2842
- Instruction::FAdd
2843
- ? Instruction::FSub
2844
- : Instruction::FAdd,
2845
- EndValue, Step);
2846
- }
2847
- Escape->setName (" ind.escape" );
2848
- MissingVals[UI] = Escape;
2849
- }
2850
- }
2851
-
2852
- assert ((MissingVals.empty () ||
2853
- all_of (MissingVals,
2854
- [MiddleBlock, this ](const std::pair<Value *, Value *> &P) {
2855
- return all_of (
2856
- predecessors (cast<Instruction>(P.first )->getParent ()),
2857
- [MiddleBlock, this ](BasicBlock *Pred) {
2858
- return Pred == MiddleBlock ||
2859
- Pred == OrigLoop->getLoopLatch ();
2860
- });
2861
- })) &&
2862
- " Expected escaping values from latch/middle.block only" );
2863
-
2864
- for (auto &I : MissingVals) {
2865
- PHINode *PHI = cast<PHINode>(I.first );
2866
- // One corner case we have to handle is two IVs "chasing" each-other,
2867
- // that is %IV2 = phi [...], [ %IV1, %latch ]
2868
- // In this case, if IV1 has an external use, we need to avoid adding both
2869
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
2870
- // don't already have an incoming value for the middle block.
2871
- if (PHI->getBasicBlockIndex (MiddleBlock) == -1 )
2872
- PHI->addIncoming (I.second , MiddleBlock);
2873
- }
2874
- }
2875
-
2876
2776
namespace {
2877
2777
2878
2778
struct CSEDenseMapInfo {
@@ -2999,24 +2899,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
2999
2899
for (PHINode &PN : Exit->phis ())
3000
2900
PSE.getSE ()->forgetLcssaPhiWithNewPredecessor (OrigLoop, &PN);
3001
2901
3002
- if (Cost->requiresScalarEpilogue (VF.isVector ())) {
3003
- // No edge from the middle block to the unique exit block has been inserted
3004
- // and there is nothing to fix from vector loop; phis should have incoming
3005
- // from scalar loop only.
3006
- } else {
3007
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
3008
- // the cost model.
3009
-
3010
- // If we inserted an edge from the middle block to the unique exit block,
3011
- // update uses outside the loop (phis) to account for the newly inserted
3012
- // edge.
3013
-
3014
- // Fix-up external users of the induction variables.
3015
- for (const auto &Entry : Legal->getInductionVars ())
3016
- fixupIVUsers (Entry.first , Entry.second ,
3017
- getOrCreateVectorTripCount (nullptr ), LoopMiddleBlock, State);
3018
- }
3019
-
3020
2902
// Don't apply optimizations below when no vector region remains, as they all
3021
2903
// require a vector loop at the moment.
3022
2904
if (!State.Plan ->getVectorLoopRegion ())
@@ -9049,11 +8931,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
9049
8931
// / Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
9050
8932
// / induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
9051
8933
// / the end value of the induction.
9052
- static VPValue *addResumePhiRecipeForInduction (VPWidenInductionRecipe *WideIV,
9053
- VPBuilder &VectorPHBuilder,
9054
- VPBuilder &ScalarPHBuilder,
9055
- VPTypeAnalysis &TypeInfo,
9056
- VPValue *VectorTC) {
8934
+ static VPInstruction *addResumePhiRecipeForInduction (
8935
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
8936
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
9057
8937
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
9058
8938
// Truncated wide inductions resume from the last lane of their vector value
9059
8939
// in the last vector iteration which is handled elsewhere.
@@ -9087,8 +8967,10 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
9087
8967
9088
8968
// / Create resume phis in the scalar preheader for first-order recurrences,
9089
8969
// / reductions and inductions, and update the VPIRInstructions wrapping the
9090
- // / original phis in the scalar header.
9091
- static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan) {
8970
+ // / original phis in the scalar header. End values for inductions are added to
8971
+ // / \p IVEndValues.
8972
+ static void addScalarResumePhis (VPRecipeBuilder &Builder, VPlan &Plan,
8973
+ DenseMap<VPValue *, VPValue *> &IVEndValues) {
9092
8974
VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
9093
8975
auto *ScalarPH = Plan.getScalarPreheader ();
9094
8976
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor ());
@@ -9105,11 +8987,16 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
9105
8987
if (!ScalarPhiI)
9106
8988
break ;
9107
8989
8990
+ // TODO: Extract final value from induction recipe initially, optimize to
8991
+ // pre-computed end value together in optimizeInductionExitUsers.
9108
8992
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe (ScalarPhiI));
9109
8993
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
9110
- if (VPValue *ResumePhi = addResumePhiRecipeForInduction (
8994
+ if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction (
9111
8995
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
9112
8996
&Plan.getVectorTripCount ())) {
8997
+ assert (ResumePhi->getOpcode () == VPInstruction::ResumePhi &&
8998
+ " Expected a ResumePhi" );
8999
+ IVEndValues[WideIVR] = ResumePhi->getOperand (0 );
9113
9000
ScalarPhiIRI->addOperand (ResumePhi);
9114
9001
continue ;
9115
9002
}
@@ -9140,65 +9027,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
9140
9027
}
9141
9028
}
9142
9029
9143
- // / Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
9144
- // / either an untruncated wide induction, or if it increments a wide induction
9145
- // / by its step.
9146
- static bool isOptimizableIVOrUse (VPValue *VPV) {
9147
- VPRecipeBase *Def = VPV->getDefiningRecipe ();
9148
- if (!Def)
9149
- return false ;
9150
- auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
9151
- if (WideIV) {
9152
- // VPV itself is a wide induction, separately compute the end value for exit
9153
- // users if it is not a truncated IV.
9154
- return isa<VPWidenPointerInductionRecipe>(WideIV) ||
9155
- !cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst ();
9156
- }
9157
-
9158
- // Check if VPV is an optimizable induction increment.
9159
- if (Def->getNumOperands () != 2 )
9160
- return false ;
9161
- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand (0 ));
9162
- if (!WideIV)
9163
- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand (1 ));
9164
- if (!WideIV)
9165
- return false ;
9166
-
9167
- using namespace VPlanPatternMatch ;
9168
- auto &ID = WideIV->getInductionDescriptor ();
9169
-
9170
- // Check if VPV increments the induction by the induction step.
9171
- VPValue *IVStep = WideIV->getStepValue ();
9172
- switch (ID.getInductionOpcode ()) {
9173
- case Instruction::Add:
9174
- return match (VPV, m_c_Binary<Instruction::Add>(m_Specific (WideIV),
9175
- m_Specific (IVStep)));
9176
- case Instruction::FAdd:
9177
- return match (VPV, m_c_Binary<Instruction::FAdd>(m_Specific (WideIV),
9178
- m_Specific (IVStep)));
9179
- case Instruction::FSub:
9180
- return match (VPV, m_Binary<Instruction::FSub>(m_Specific (WideIV),
9181
- m_Specific (IVStep)));
9182
- case Instruction::Sub: {
9183
- // IVStep will be the negated step of the subtraction. Check if Step == -1 *
9184
- // IVStep.
9185
- VPValue *Step;
9186
- if (!match (VPV, m_Binary<Instruction::Sub>(m_VPValue (), m_VPValue (Step))) ||
9187
- !Step->isLiveIn () || !IVStep->isLiveIn ())
9188
- return false ;
9189
- auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue ());
9190
- auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue ());
9191
- return StepCI && IVStepCI &&
9192
- StepCI->getValue () == (-1 * IVStepCI->getValue ());
9193
- }
9194
- default :
9195
- return ID.getKind () == InductionDescriptor::IK_PtrInduction &&
9196
- match (VPV, m_GetElementPtr (m_Specific (WideIV),
9197
- m_Specific (WideIV->getStepValue ())));
9198
- }
9199
- llvm_unreachable (" should have been covered by switch above" );
9200
- }
9201
-
9202
9030
// Collect VPIRInstructions for phis in the exit blocks that are modeled
9203
9031
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
9204
9032
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -9228,12 +9056,6 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
9228
9056
}
9229
9057
Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
9230
9058
VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
9231
- // Exit values for inductions are computed and updated outside of VPlan
9232
- // and independent of induction recipes.
9233
- // TODO: Compute induction exit values in VPlan.
9234
- if (isOptimizableIVOrUse (V) &&
9235
- ExitVPBB->getSinglePredecessor () == MiddleVPBB)
9236
- continue ;
9237
9059
ExitUsersToFix.insert (ExitIRI);
9238
9060
ExitIRI->addOperand (V);
9239
9061
}
@@ -9253,6 +9075,7 @@ addUsersInExitBlocks(VPlan &Plan,
9253
9075
9254
9076
auto *MiddleVPBB = Plan.getMiddleBlock ();
9255
9077
VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
9078
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
9256
9079
9257
9080
// Introduce extract for exiting values and update the VPIRInstructions
9258
9081
// modeling the corresponding LCSSA phis.
@@ -9574,7 +9397,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9574
9397
VPlanTransforms::handleUncountableEarlyExit (
9575
9398
*Plan, *PSE.getSE (), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9576
9399
}
9577
- addScalarResumePhis (RecipeBuilder, *Plan);
9400
+ DenseMap<VPValue *, VPValue *> IVEndValues;
9401
+ addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
9578
9402
SetVector<VPIRInstruction *> ExitUsersToFix =
9579
9403
collectUsersInExitBlocks (OrigLoop, RecipeBuilder, *Plan);
9580
9404
addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
@@ -9657,6 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9657
9481
VPlanTransforms::addActiveLaneMask (*Plan, ForControlFlow,
9658
9482
WithoutRuntimeCheck);
9659
9483
}
9484
+ VPlanTransforms::optimizeInductionExitUsers (*Plan, IVEndValues);
9660
9485
9661
9486
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9662
9487
return Plan;
@@ -9708,7 +9533,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9708
9533
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
9709
9534
RecipeBuilder.setRecipe (HeaderR->getUnderlyingInstr (), HeaderR);
9710
9535
}
9711
- addScalarResumePhis (RecipeBuilder, *Plan);
9536
+ DenseMap<VPValue *, VPValue *> IVEndValues;
9537
+ // TODO: IVEndValues are not used yet in the native path, to optimize exit
9538
+ // values.
9539
+ addScalarResumePhis (RecipeBuilder, *Plan, IVEndValues);
9712
9540
9713
9541
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9714
9542
return Plan;
0 commit comments