Skip to content

Commit bc03d6c

Browse files
authored
[VPlan] Introduce all loop regions as VPlan transform. (NFC) (llvm#129402)
Further simplify VPlan CFG builder by moving introduction of inner regions to a VPlan transform, building on llvm#128419. The HCFG builder now only constructs plain CFGs. I will move it to VPlanConstruction as follow-up. Depends on llvm#128419. PR: llvm#129402
1 parent 2d63fae commit bc03d6c

File tree

10 files changed

+149
-198
lines changed

10 files changed

+149
-198
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9544,14 +9544,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95449544
Range);
95459545
auto Plan = std::make_unique<VPlan>(OrigLoop);
95469546
// Build hierarchical CFG.
9547-
// Convert to VPlan-transform and consoliate all transforms for VPlan
9547+
// TODO: Convert to VPlan-transform and consolidate all transforms for VPlan
95489548
// creation.
95499549
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9550-
HCFGBuilder.buildHierarchicalCFG();
9550+
HCFGBuilder.buildPlainCFG();
95519551

9552-
VPlanTransforms::introduceTopLevelVectorLoopRegion(
9553-
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
9554-
CM.foldTailByMasking(), OrigLoop);
9552+
VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
9553+
PSE, RequiresScalarEpilogueCheck,
9554+
CM.foldTailByMasking(), OrigLoop);
95559555

95569556
// Don't use getDecisionAndClampRange here, because we don't know the UF
95579557
// so this function is better to be conservative, rather than to split
@@ -9851,10 +9851,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
98519851
auto Plan = std::make_unique<VPlan>(OrigLoop);
98529852
// Build hierarchical CFG
98539853
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
9854-
HCFGBuilder.buildHierarchicalCFG();
9854+
HCFGBuilder.buildPlainCFG();
98559855

9856-
VPlanTransforms::introduceTopLevelVectorLoopRegion(
9857-
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
9856+
VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
9857+
PSE, true, false, OrigLoop);
98589858

98599859
for (ElementCount VF : Range)
98609860
Plan->addVF(VF);

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class SCEV;
5757
class Type;
5858
class VPBasicBlock;
5959
class VPBuilder;
60+
class VPDominatorTree;
6061
class VPRegionBlock;
6162
class VPlan;
6263
class VPLane;
@@ -303,6 +304,13 @@ class VPBlockBase {
303304
/// Remove all the successors of this block.
304305
void clearSuccessors() { Successors.clear(); }
305306

307+
/// Swap predecessors of the block. The block must have exactly 2
308+
/// predecessors.
309+
void swapPredecessors() {
310+
assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
311+
std::swap(Predecessors[0], Predecessors[1]);
312+
}
313+
306314
/// Swap successors of the block. The block must have exactly 2 successors.
307315
// TODO: This should be part of introducing conditional branch recipes rather
308316
// than being independent.

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 78 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,88 @@
1414
#include "LoopVectorizationPlanner.h"
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
17+
#include "VPlanDominatorTree.h"
1718
#include "VPlanTransforms.h"
1819
#include "llvm/Analysis/LoopInfo.h"
1920
#include "llvm/Analysis/ScalarEvolution.h"
2021

2122
using namespace llvm;
2223

23-
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
24-
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
25-
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
26-
// TODO: Generalize to introduce all loop regions.
27-
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
28-
VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
24+
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
25+
/// has exactly 2 predecessors (preheader and latch), where the block
26+
/// dominates the latch and the preheader dominates the block. If it is a
27+
/// header block return true, making sure the preheader appears first and
28+
/// the latch second. Otherwise return false.
29+
static bool canonicalHeader(VPBlockBase *HeaderVPB,
30+
const VPDominatorTree &VPDT) {
31+
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
32+
if (Preds.size() != 2)
33+
return false;
2934

30-
VPBasicBlock *OriginalLatch =
31-
cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
32-
VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
33-
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
34-
VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
35-
assert(OriginalLatch->getNumSuccessors() == 0 &&
36-
"Plan should end at top level latch");
35+
auto *PreheaderVPBB = Preds[0];
36+
auto *LatchVPBB = Preds[1];
37+
if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
38+
VPDT.dominates(HeaderVPB, LatchVPBB))
39+
return true;
40+
41+
std::swap(PreheaderVPBB, LatchVPBB);
42+
43+
if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
44+
VPDT.dominates(HeaderVPB, LatchVPBB)) {
45+
// Canonicalize predecessors of header so that preheader is first and latch
46+
// second.
47+
HeaderVPB->swapPredecessors();
48+
for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
49+
R.swapOperands();
50+
return true;
51+
}
52+
53+
return false;
54+
}
55+
56+
/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
57+
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
58+
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
59+
auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
60+
61+
VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
62+
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
63+
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
64+
assert(LatchVPBB->getNumSuccessors() <= 1 &&
65+
"Latch has more than one successor");
66+
if (Succ)
67+
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
68+
69+
auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
70+
false /*isReplicator*/);
71+
R->setParent(HeaderVPB->getParent());
72+
// All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
73+
// because VPlan is expected to end at top level latch disconnected above.
74+
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
75+
VPBB->setParent(R);
76+
77+
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
78+
if (Succ)
79+
VPBlockUtils::connectBlocks(R, Succ);
80+
}
81+
82+
void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
83+
PredicatedScalarEvolution &PSE,
84+
bool RequiresScalarEpilogueCheck,
85+
bool TailFolded, Loop *TheLoop) {
86+
VPDominatorTree VPDT;
87+
VPDT.recalculate(Plan);
88+
for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
89+
if (canonicalHeader(HeaderVPB, VPDT))
90+
createLoopRegion(Plan, HeaderVPB);
91+
92+
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
93+
auto *OrigExiting = TopRegion->getExiting();
94+
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
95+
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
96+
TopRegion->setExiting(LatchVPBB);
97+
TopRegion->setName("vector loop");
98+
TopRegion->getEntryBasicBlock()->setName("vector.body");
3799

38100
// Create SCEV and VPValue for the trip count.
39101
// We use the symbolic max backedge-taken-count, which works also when
@@ -47,18 +109,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
47109
Plan.setTripCount(
48110
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));
49111

50-
// Create VPRegionBlock, with existing header and new empty latch block, to be
51-
// filled.
52-
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
53-
VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
54-
auto *TopRegion = Plan.createVPRegionBlock(
55-
HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
56-
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
57-
// because VPlan is expected to end at top level latch.
58-
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
59-
VPBB->setParent(TopRegion);
60-
61-
VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
112+
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
113+
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
114+
62115
VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
63116
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
64117

0 commit comments

Comments
 (0)