Skip to content

Commit 897d8ee

Browse files
committed
[StructurizeCFG] Fix region nodes ordering
This is a reimplementation of the `orderNodes` function, as the old implementation didn't take into account all cases. Fix PR41509 Differential Revision: https://reviews.llvm.org/D79037
1 parent f61f6ff commit 897d8ee

File tree

2 files changed

+351
-47
lines changed

2 files changed

+351
-47
lines changed

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

+89-47
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ class StructurizeCFG : public RegionPass {
215215
void orderNodes();
216216

217217
Loop *getAdjustedLoop(RegionNode *RN);
218-
unsigned getAdjustedLoopDepth(RegionNode *RN);
219218

220219
void analyzeLoops(RegionNode *N);
221220

@@ -324,65 +323,108 @@ Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) {
324323
return LI->getLoopFor(RN->getEntry());
325324
}
326325

327-
/// Use the exit block to determine the loop depth if RN is a SubRegion.
328-
unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) {
329-
if (RN->isSubRegion()) {
330-
Region *SubR = RN->getNodeAs<Region>();
331-
return LI->getLoopDepth(SubR->getExit());
332-
}
333-
334-
return LI->getLoopDepth(RN->getEntry());
335-
}
336-
337-
/// Build up the general order of nodes
326+
/// Build up the general order of nodes, by performing a topology sort of the
327+
/// parent region's nodes, while ensuring that there is no outer loop node
328+
/// between any two inner loop nodes.
338329
void StructurizeCFG::orderNodes() {
339-
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
340-
SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
330+
SmallVector<RegionNode *, 32> POT;
331+
SmallDenseMap<Loop *, unsigned, 8> LoopSizes;
332+
for (RegionNode *RN : post_order(ParentRegion)) {
333+
POT.push_back(RN);
341334

342-
// The reverse post-order traversal of the list gives us an ordering close
343-
// to what we want. The only problem with it is that sometimes backedges
344-
// for outer loops will be visited before backedges for inner loops.
345-
for (RegionNode *RN : RPOT) {
335+
// Accumulate the number of nodes inside the region that belong to a loop.
346336
Loop *Loop = getAdjustedLoop(RN);
347-
++LoopBlocks[Loop];
337+
++LoopSizes[Loop];
348338
}
349-
350-
unsigned CurrentLoopDepth = 0;
339+
// A quick exit for the case where all nodes belong to the same loop (or no
340+
// loop at all).
341+
if (LoopSizes.size() <= 1U) {
342+
Order.assign(POT.begin(), POT.end());
343+
return;
344+
}
345+
Order.resize(POT.size());
346+
347+
// The post-order traversal of the list gives us an ordering close to what we
348+
// want. The only problem with it is that sometimes backedges for outer loops
349+
// will be visited before backedges for inner loops. So now we fix that by
350+
// inserting the nodes in order, while making sure that encountered inner loop
351+
// are complete before their parents (outer loops).
352+
353+
SmallVector<Loop *, 8> WorkList;
354+
// Get the size of the outermost region (the nodes that don't belong to any
355+
// loop inside ParentRegion).
356+
unsigned ZeroCurrentLoopSize = 0U;
357+
auto LSI = LoopSizes.find(nullptr);
358+
unsigned *CurrentLoopSize =
359+
LSI != LoopSizes.end() ? &LSI->second : &ZeroCurrentLoopSize;
351360
Loop *CurrentLoop = nullptr;
352-
for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
353-
RegionNode *RN = cast<RegionNode>(*I);
354-
unsigned LoopDepth = getAdjustedLoopDepth(RN);
355361

356-
if (is_contained(Order, *I))
357-
continue;
362+
// The "skipped" list is actually located at the (reversed) beginning of the
363+
// POT. This saves us the use of an intermediate container.
364+
// Note that there is always enough room, for the skipped nodes, before the
365+
// current location, as we have just passed at least that amount of nodes.
366+
367+
auto Begin = POT.rbegin();
368+
auto I = Begin, SkippedEnd = Begin;
369+
auto O = Order.rbegin(), OE = Order.rend();
370+
while (O != OE) {
371+
// If we have any skipped nodes, then erase the gap between the end of the
372+
// "skipped" list, and the current location.
373+
if (SkippedEnd != Begin) {
374+
POT.erase(I.base(), SkippedEnd.base());
375+
I = SkippedEnd = Begin = POT.rbegin();
376+
}
358377

359-
if (LoopDepth < CurrentLoopDepth) {
360-
// Make sure we have visited all blocks in this loop before moving back to
361-
// the outer loop.
378+
// Keep processing outer loops, in order (from inner most, to outer).
379+
if (!WorkList.empty()) {
380+
CurrentLoop = WorkList.pop_back_val();
381+
CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second;
382+
}
362383

363-
auto LoopI = I;
364-
while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
365-
LoopI++;
366-
if (getAdjustedLoop(cast<RegionNode>(*LoopI)) == CurrentLoop) {
367-
--BlockCount;
368-
Order.push_back(*LoopI);
384+
// Keep processing loops while only going deeper (into inner loops).
385+
do {
386+
assert(I != POT.rend());
387+
RegionNode *RN = *I++;
388+
389+
Loop *L = getAdjustedLoop(RN);
390+
if (L != CurrentLoop) {
391+
// If L is a loop inside CurrentLoop, then CurrentLoop must be the
392+
// parent of L.
393+
// To prove this, we will contradict the opposite:
394+
// Let P be the parent of L. If CurrentLoop is the parent of P, then
395+
// the header of P must have been processed already, as it must
396+
// dominate the other blocks of P (otherwise P is an irreducible loop,
397+
// and won't be recorded in the LoopInfo), especially L (inside). But
398+
// then CurrentLoop must have been updated to P at the time of
399+
// processing the header of P, which conflicts with the assumption
400+
// that CurrentLoop is not P.
401+
402+
// If L is not a loop inside CurrentLoop, then skip RN.
403+
if (!L || L->getParentLoop() != CurrentLoop) {
404+
// Skip the node by pushing it to the end of the "skipped" list.
405+
*SkippedEnd++ = RN;
406+
continue;
369407
}
408+
409+
// If we still haven't processed all the nodes that belong to
410+
// CurrentLoop, then make sure we come back later, to finish the job, by
411+
// pushing it to the WorkList.
412+
if (*CurrentLoopSize)
413+
WorkList.push_back(CurrentLoop);
414+
415+
CurrentLoop = L;
416+
CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second;
370417
}
371-
}
372418

373-
CurrentLoop = getAdjustedLoop(RN);
374-
if (CurrentLoop)
375-
LoopBlocks[CurrentLoop]--;
419+
assert(O != OE);
420+
*O++ = RN;
376421

377-
CurrentLoopDepth = LoopDepth;
378-
Order.push_back(*I);
422+
// If we have finished processing the current loop, then we are done here.
423+
--*CurrentLoopSize;
424+
} while (*CurrentLoopSize);
379425
}
380-
381-
// This pass originally used a post-order traversal and then operated on
382-
// the list in reverse. Now that we are using a reverse post-order traversal
383-
// rather than re-working the whole pass to operate on the list in order,
384-
// we just reverse the list and continue to operate on it in reverse.
385-
std::reverse(Order.begin(), Order.end());
426+
assert(WorkList.empty());
427+
assert(SkippedEnd == Begin);
386428
}
387429

388430
/// Determine the end of the loops

0 commit comments

Comments
 (0)