@@ -160,6 +160,10 @@ namespace {
160
160
161
161
// / Process all of the top-level loops in the function in post-order.
162
162
void processLoops () {
163
+ if (auto outs = getTFDumpIntermediateStream ()) {
164
+ dumpTopLevelLoopInfo (outs, " Before" );
165
+ }
166
+
163
167
// Apply the standard SIL loop canonicalization transformations. This
164
168
// automatically gives us the following invariants: loops are guaranteed
165
169
// to have a single preheader, a single backedge block, and exit??
@@ -180,6 +184,11 @@ namespace {
180
184
181
185
for (auto *loop : LI)
182
186
processLoop (loop);
187
+
188
+ if (auto outs = getTFDumpIntermediateStream ()) {
189
+ dumpTopLevelLoopInfo (outs, " After" );
190
+ }
191
+
183
192
}
184
193
185
194
@@ -189,6 +198,18 @@ namespace {
189
198
SILBasicBlock *endBB);
190
199
void processLoop (SILLoop *loop);
191
200
void ensureSingleExitFromLoops ();
201
+
202
+ // Dump top-level loop information for debugging purposes.
203
+ void dumpTopLevelLoopInfo (llvm::raw_ostream* outs, const char * stage) {
204
+ *outs << " --- XLA CFG Loops " << stage << " Canonicalize: " << F->getName ()
205
+ << " \n " ;
206
+ for (auto *loop : LI.getTopLevelLoops ()) {
207
+ loop->print (*outs);
208
+ }
209
+ *outs << " \n --- XLA CFG Loops " << stage << " Canonicalize end\n " ;
210
+ outs->flush ();
211
+ }
212
+
192
213
};
193
214
} // end anonymous namespace
194
215
@@ -391,13 +412,102 @@ class BasicBlockCloner : public SILClonerWithScopes<BasicBlockCloner> {
391
412
return Value;
392
413
}
393
414
394
- // Update ValueMap so that occurrences of `oldValue` are replaced with
395
- // `newValue` when cloning.
415
+ // / Update ValueMap so that occurrences of `oldValue` are replaced with
416
+ // / `newValue` when cloning.
396
417
void updateValueMap (SILValue oldValue, SILValue newValue) {
397
418
auto emplaceResult = ValueMap.try_emplace (oldValue, newValue);
398
419
assert (emplaceResult.second && " Updating the same key in ValueMap multiple "
399
420
" times during SESE cloning." );
400
421
}
422
+
423
+ // / Clone the body of `loop` starting from `startBlock` and nest the cloned
424
+ // / fragment into the parent loop. If `startBlock` is the same as the header
425
+ // / of `loop`, we clone the entire loop including the back edge. Otherwise,
426
+ // / we clone one iteration of the loop body without the back edge.
427
+ SILLoop *cloneLoop (SILLoopInfo *LI, SILLoop *loop, SILBasicBlock *startBlock) {
428
+ llvm::DenseMap<SILLoop*, SILLoop*> loopClones;
429
+ // This is for convenience as top-level loops have nullptr for parent loop.
430
+ loopClones[nullptr ] = nullptr ;
431
+
432
+ SmallVector<SILLoop *, 4 > loops = LI->getBase ().getLoopsInPreorder ();
433
+ auto loopIter = loops.begin ();
434
+
435
+ // Skip until we get to our loop in the pre-order.
436
+ while (loopIter != loops.end () && *loopIter != loop) {
437
+ loopIter++;
438
+ }
439
+ auto nestingLoop = loop->getParentLoop ();
440
+ if (loop->getHeader () == startBlock) {
441
+ // If header is the start block, we are cloning the entire
442
+ // loop. Therefore, we should create a new SILLoop.
443
+ SILLoop *loopClone = LI->getBase ().AllocateLoop ();
444
+ if (nestingLoop) {
445
+ nestingLoop->addChildLoop (loopClone);
446
+ } else {
447
+ LI->addTopLevelLoop (loopClone);
448
+ }
449
+ loopClones[loop] = loopClone;
450
+ } else {
451
+ // We are not cloning the entire loop. Place cloned blocks in the
452
+ // `outerLoop` instead.
453
+ loopClones[loop] = nestingLoop;
454
+ }
455
+
456
+ // Move to the next loop.
457
+ ++loopIter;
458
+
459
+ // Create the loop nesting structure of the current loop's body by iterating
460
+ // over all the loops nested within `loop` and creating empty clones. We
461
+ // need do this first so that when we add a block to an inner loop using
462
+ // `addBasicBlockToLoop`, it gets added to the parent loops as well.
463
+ for (/* iterators initialized*/ ; loopIter != loops.end (); ++loopIter) {
464
+ SILLoop *curLoop = *loopIter;
465
+ SILLoop *parentLoop = curLoop->getParentLoop ();
466
+ // Break if we have reached the same nesting depth as `loop`, which
467
+ // implies that we have visited all the subloops of `loop`.
468
+ if (parentLoop == nestingLoop) break ;
469
+ SILLoop *loopClone = LI->getBase ().AllocateLoop ();
470
+ SILLoop *parentLoopClone = loopClones[parentLoop];
471
+ if (parentLoopClone) {
472
+ parentLoopClone->addChildLoop (loopClone);
473
+ } else {
474
+ LI->addTopLevelLoop (loopClone);
475
+ }
476
+ loopClones[curLoop] = loopClone;
477
+ }
478
+
479
+ // Clone the body of the loop starting from the given startBlock. We should
480
+ // traverse the blocks in depth first order to ensure values are cloned
481
+ // before they are used.
482
+ SmallPtrSet<SILBasicBlock *, 32 > worklist;
483
+ SmallVector<SILBasicBlock *, 32 > initializedBlocks;
484
+ worklist.insert (startBlock);
485
+ while (!worklist.empty ()) {
486
+ SILBasicBlock *current = *worklist.begin ();
487
+ worklist.erase (current);
488
+ initBlock (current);
489
+ initializedBlocks.push_back (current);
490
+ for (SILBasicBlock *succ : current->getSuccessorBlocks ()) {
491
+ // Skip if succ is not a part of the loop, is already initialized, or
492
+ // is the header.
493
+ if (!loop->contains (succ) || remapBasicBlock (succ) != succ ||
494
+ succ == loop->getHeader ()) {
495
+ continue ;
496
+ }
497
+ worklist.insert (succ);
498
+ }
499
+ }
500
+ for (SILBasicBlock *bb : initializedBlocks) {
501
+ SILBasicBlock *clonedBlock = cloneBlock (bb);
502
+ if (SILLoop *loopClone = loopClones[LI->getLoopFor (bb)]) {
503
+ loopClone->addBasicBlockToLoop (clonedBlock, LI->getBase ());
504
+ if (LI->getLoopFor (bb)->getHeader () == bb) {
505
+ loopClone->moveToHeader (clonedBlock);
506
+ }
507
+ }
508
+ }
509
+ return loopClones[loop];
510
+ }
401
511
};
402
512
403
513
} // namespace
@@ -1302,34 +1412,9 @@ void SingleExitLoopTransformer::unrollLoopBodyOnce() {
1302
1412
auto newHeaderArg = newHeader->getArgument (argIndex);
1303
1413
cloner.updateValueMap (newHeaderArg, preheaderArg);
1304
1414
}
1305
- // Clone everything except the new header. We should traverse the
1306
- // blocks in depth first order to ensure values are cloned before they are used.
1307
- SmallPtrSet<SILBasicBlock *, 32 > worklist;
1308
- SmallVector<SILBasicBlock *, 32 > initializedBlocks;
1309
- worklist.insert (header);
1310
- while (!worklist.empty ()) {
1311
- SILBasicBlock *current = *worklist.begin ();
1312
- worklist.erase (current);
1313
- cloner.initBlock (current);
1314
- initializedBlocks.push_back (current);
1315
- for (SILBasicBlock *succ : current->getSuccessorBlocks ()) {
1316
- // Skip if succ is not a part of the loop, is already cloned, or
1317
- // is the new preheader.
1318
- if (!loop->contains (succ) || cloner.remapBasicBlock (succ) != succ ||
1319
- succ == newHeader) {
1320
- continue ;
1321
- }
1322
- worklist.insert (succ);
1323
- }
1324
- }
1325
1415
1326
- SILLoop *parentLoop = loop->getParentLoop ();
1327
- for (SILBasicBlock *bb : initializedBlocks) {
1328
- SILBasicBlock *clonedBlock = cloner.cloneBlock (bb);
1329
- if (parentLoop) {
1330
- parentLoop->addBasicBlockToLoop (clonedBlock, LI->getBase ());
1331
- }
1332
- }
1416
+ // Clone everything starting from the old header.
1417
+ cloner.cloneLoop (LI, loop, header);
1333
1418
1334
1419
// Get the clone for old header.
1335
1420
SILBasicBlock *clonedOldHeader = cloner.remapBasicBlock (header);
0 commit comments