Skip to content

Commit b7f2fda

Browse files
authored
SESE: update LoopInfo incrementally when cloning a loop body with nested loops. (#20167)
1 parent 7f79ae5 commit b7f2fda

File tree

2 files changed

+330
-29
lines changed

2 files changed

+330
-29
lines changed

lib/SILOptimizer/Mandatory/TFCanonicalizeCFG.cpp

Lines changed: 114 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ namespace {
160160

161161
/// Process all of the top-level loops in the function in post-order.
162162
void processLoops() {
163+
if (auto outs = getTFDumpIntermediateStream()) {
164+
dumpTopLevelLoopInfo(outs, "Before");
165+
}
166+
163167
// Apply the standard SIL loop canonicalization transformations. This
164168
// automatically gives us the following invariants: loops are guaranteed
165169
// to have a single preheader, a single backedge block, and exit??
@@ -180,6 +184,11 @@ namespace {
180184

181185
for (auto *loop : LI)
182186
processLoop(loop);
187+
188+
if (auto outs = getTFDumpIntermediateStream()) {
189+
dumpTopLevelLoopInfo(outs, "After");
190+
}
191+
183192
}
184193

185194

@@ -189,6 +198,18 @@ namespace {
189198
SILBasicBlock *endBB);
190199
void processLoop(SILLoop *loop);
191200
void ensureSingleExitFromLoops();
201+
202+
// Dump top-level loop information for debugging purposes.
203+
void dumpTopLevelLoopInfo(llvm::raw_ostream* outs, const char* stage) {
204+
*outs << "--- XLA CFG Loops " << stage << " Canonicalize: " << F->getName()
205+
<< "\n";
206+
for (auto *loop : LI.getTopLevelLoops()) {
207+
loop->print(*outs);
208+
}
209+
*outs << "\n--- XLA CFG Loops " << stage << " Canonicalize end\n";
210+
outs->flush();
211+
}
212+
192213
};
193214
} // end anonymous namespace
194215

@@ -391,13 +412,102 @@ class BasicBlockCloner : public SILClonerWithScopes<BasicBlockCloner> {
391412
return Value;
392413
}
393414

394-
// Update ValueMap so that occurrences of `oldValue` are replaced with
395-
// `newValue` when cloning.
415+
/// Update ValueMap so that occurrences of `oldValue` are replaced with
416+
/// `newValue` when cloning.
396417
void updateValueMap(SILValue oldValue, SILValue newValue) {
397418
auto emplaceResult = ValueMap.try_emplace(oldValue, newValue);
398419
assert(emplaceResult.second && "Updating the same key in ValueMap multiple "
399420
"times during SESE cloning.");
400421
}
422+
423+
/// Clone the body of `loop` starting from `startBlock` and nest the cloned
424+
/// fragment into the parent loop. If `startBlock` is the same as the header
425+
/// of `loop`, we clone the entire loop including the back edge. Otherwise,
426+
/// we clone one iteration of the loop body without the back edge.
427+
SILLoop *cloneLoop(SILLoopInfo *LI, SILLoop *loop, SILBasicBlock *startBlock) {
428+
llvm::DenseMap<SILLoop*, SILLoop*> loopClones;
429+
// This is for convenience as top-level loops have nullptr for parent loop.
430+
loopClones[nullptr] = nullptr;
431+
432+
SmallVector<SILLoop *, 4> loops = LI->getBase().getLoopsInPreorder();
433+
auto loopIter = loops.begin();
434+
435+
// Skip until we get to our loop in the pre-order.
436+
while (loopIter != loops.end() && *loopIter != loop) {
437+
loopIter++;
438+
}
439+
auto nestingLoop = loop->getParentLoop();
440+
if (loop->getHeader() == startBlock) {
441+
// If header is the start block, we are cloning the entire
442+
// loop. Therefore, we should create a new SILLoop.
443+
SILLoop *loopClone = LI->getBase().AllocateLoop();
444+
if (nestingLoop) {
445+
nestingLoop->addChildLoop(loopClone);
446+
} else {
447+
LI->addTopLevelLoop(loopClone);
448+
}
449+
loopClones[loop] = loopClone;
450+
} else {
451+
// We are not cloning the entire loop. Place cloned blocks in the
452+
// `outerLoop` instead.
453+
loopClones[loop] = nestingLoop;
454+
}
455+
456+
// Move to the next loop.
457+
++loopIter;
458+
459+
// Create the loop nesting structure of the current loop's body by iterating
460+
// over all the loops nested within `loop` and creating empty clones. We
461+
// need do this first so that when we add a block to an inner loop using
462+
// `addBasicBlockToLoop`, it gets added to the parent loops as well.
463+
for (/*iterators initialized*/; loopIter != loops.end(); ++loopIter) {
464+
SILLoop *curLoop = *loopIter;
465+
SILLoop *parentLoop = curLoop->getParentLoop();
466+
// Break if we have reached the same nesting depth as `loop`, which
467+
// implies that we have visited all the subloops of `loop`.
468+
if (parentLoop == nestingLoop) break;
469+
SILLoop *loopClone = LI->getBase().AllocateLoop();
470+
SILLoop *parentLoopClone = loopClones[parentLoop];
471+
if (parentLoopClone) {
472+
parentLoopClone->addChildLoop(loopClone);
473+
} else {
474+
LI->addTopLevelLoop(loopClone);
475+
}
476+
loopClones[curLoop] = loopClone;
477+
}
478+
479+
// Clone the body of the loop starting from the given startBlock. We should
480+
// traverse the blocks in depth first order to ensure values are cloned
481+
// before they are used.
482+
SmallPtrSet<SILBasicBlock *, 32> worklist;
483+
SmallVector<SILBasicBlock *, 32> initializedBlocks;
484+
worklist.insert(startBlock);
485+
while (!worklist.empty()) {
486+
SILBasicBlock *current = *worklist.begin();
487+
worklist.erase(current);
488+
initBlock(current);
489+
initializedBlocks.push_back(current);
490+
for (SILBasicBlock *succ : current->getSuccessorBlocks()) {
491+
// Skip if succ is not a part of the loop, is already initialized, or
492+
// is the header.
493+
if (!loop->contains(succ) || remapBasicBlock(succ) != succ ||
494+
succ == loop->getHeader()) {
495+
continue;
496+
}
497+
worklist.insert(succ);
498+
}
499+
}
500+
for (SILBasicBlock *bb : initializedBlocks) {
501+
SILBasicBlock *clonedBlock = cloneBlock(bb);
502+
if (SILLoop *loopClone = loopClones[LI->getLoopFor(bb)]) {
503+
loopClone->addBasicBlockToLoop(clonedBlock, LI->getBase());
504+
if (LI->getLoopFor(bb)->getHeader() == bb) {
505+
loopClone->moveToHeader(clonedBlock);
506+
}
507+
}
508+
}
509+
return loopClones[loop];
510+
}
401511
};
402512

403513
} // namespace
@@ -1302,34 +1412,9 @@ void SingleExitLoopTransformer::unrollLoopBodyOnce() {
13021412
auto newHeaderArg = newHeader->getArgument(argIndex);
13031413
cloner.updateValueMap(newHeaderArg, preheaderArg);
13041414
}
1305-
// Clone everything except the new header. We should traverse the
1306-
// blocks in depth first order to ensure values are cloned before they are used.
1307-
SmallPtrSet<SILBasicBlock *, 32> worklist;
1308-
SmallVector<SILBasicBlock *, 32> initializedBlocks;
1309-
worklist.insert(header);
1310-
while (!worklist.empty()) {
1311-
SILBasicBlock *current = *worklist.begin();
1312-
worklist.erase(current);
1313-
cloner.initBlock(current);
1314-
initializedBlocks.push_back(current);
1315-
for (SILBasicBlock *succ : current->getSuccessorBlocks()) {
1316-
// Skip if succ is not a part of the loop, is already cloned, or
1317-
// is the new preheader.
1318-
if (!loop->contains(succ) || cloner.remapBasicBlock(succ) != succ ||
1319-
succ == newHeader) {
1320-
continue;
1321-
}
1322-
worklist.insert(succ);
1323-
}
1324-
}
13251415

1326-
SILLoop *parentLoop = loop->getParentLoop();
1327-
for (SILBasicBlock *bb : initializedBlocks) {
1328-
SILBasicBlock *clonedBlock = cloner.cloneBlock(bb);
1329-
if (parentLoop) {
1330-
parentLoop->addBasicBlockToLoop(clonedBlock, LI->getBase());
1331-
}
1332-
}
1416+
// Clone everything starting from the old header.
1417+
cloner.cloneLoop(LI, loop, header);
13331418

13341419
// Get the clone for old header.
13351420
SILBasicBlock *clonedOldHeader = cloner.remapBasicBlock(header);

0 commit comments

Comments
 (0)