swiftlang · atrick · Jan 28, 2021 · Jan 27, 2021 · Jan 27, 2021
@@ -78,6 +78,10 @@ class SimplifyCFG {
   llvm::SmallDenseMap<SILBasicBlock *, unsigned, 32> WorklistMap;
   // Keep track of loop headers - we don't want to jump-thread through them.
   SmallPtrSet<SILBasicBlock *, 32> LoopHeaders;
+  // The set of cloned loop headers to avoid infinite loop peeling. Blocks in
+  // this set may or may not still be LoopHeaders.
+  // (ultimately this can be used to eliminate findLoopHeaders)
+  SmallPtrSet<SILBasicBlock *, 4> ClonedLoopHeaders;
   // The cost (~ number of copied instructions) of jump threading per basic
   // block. Used to prevent infinite jump threading loops.
   llvm::SmallDenseMap<SILBasicBlock *, int, 8> JumpThreadingCost;
@@ -125,6 +129,16 @@ class SimplifyCFG {
   }
 
 private:
+  // Called when \p newBlock inherits the former predecessors of \p
+  // oldBlock. e.g. if \p oldBlock was a loop header, then newBlock is now a
+  // loop header.
+  void substitutedBlockPreds(SILBasicBlock *oldBlock, SILBasicBlock *newBlock) {
+    if (LoopHeaders.count(oldBlock))
+      LoopHeaders.insert(newBlock);
+    if (ClonedLoopHeaders.count(oldBlock))
+      ClonedLoopHeaders.insert(newBlock);
+  }
+
   void clearWorklist() {
     WorklistMap.clear();
     WorklistList.clear();
@@ -170,8 +184,10 @@ class SimplifyCFG {
     // Remove it from the map as well.
     WorklistMap.erase(It);
 
-    if (LoopHeaders.count(BB))
+    if (LoopHeaders.count(BB)) {
       LoopHeaders.erase(BB);
+      ClonedLoopHeaders.erase(BB);
+    }
   }
 
   bool simplifyBlocks();
@@ -1082,10 +1098,13 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
     return false;
 
   // Don't jump thread through a potential header - this can produce irreducible
-  // control flow. Still, we make an exception for switch_enum.
+  // control flow and lead to infinite loop peeling.
   bool DestIsLoopHeader = (LoopHeaders.count(DestBB) != 0);
   if (DestIsLoopHeader) {
-    if (!isa<SwitchEnumInst>(destTerminator))
+    // Make an exception for switch_enum, but only if it's block was not already
+    // peeled out of it's original loop. In that case, further jump threading
+    // can accomplish nothing, and the loop will be infinitely peeled.
+    if (!isa<SwitchEnumInst>(destTerminator) || ClonedLoopHeaders.count(DestBB))
       return false;
   }
 
@@ -1127,8 +1146,14 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
 
   // If we jump-thread a switch_enum in the loop header, we have to recalculate
   // the loop header info.
-  if (DestIsLoopHeader)
+  //
+  // FIXME: findLoopHeaders should not be called repeatedly during simplify-cfg
+  // iteration. It is a whole-function analysis! It also does no nothing help to
+  // avoid infinite loop peeling.
+  if (DestIsLoopHeader) {
+    ClonedLoopHeaders.insert(Cloner.getNewBB());
     findLoopHeaders();
+  }
 
   ++NumJumpThreads;
   return true;
@@ -1367,8 +1392,7 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
     for (auto &Succ : remainingBlock->getSuccessors())
       addToWorklist(Succ);
 
-    if (LoopHeaders.count(deletedBlock))
-      LoopHeaders.insert(remainingBlock);
+    substitutedBlockPreds(deletedBlock, remainingBlock);
 
     auto Iter = JumpThreadingCost.find(deletedBlock);
     if (Iter != JumpThreadingCost.end()) {
@@ -1392,8 +1416,7 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
                                          trampolineDest.newSourceBranchArgs);
     // Eliminating the trampoline can expose opportunities to improve the
     // new block we branch to.
-    if (LoopHeaders.count(DestBB))
-      LoopHeaders.insert(BB);
+    substitutedBlockPreds(DestBB, trampolineDest.destBB);
 
     addToWorklist(trampolineDest.destBB);
     BI->eraseFromParent();
@@ -1578,8 +1601,7 @@ bool SimplifyCFG::simplifyCondBrBlock(CondBranchInst *BI) {
         BI->getTrueBBCount(), BI->getFalseBBCount());
     BI->eraseFromParent();
 
-    if (LoopHeaders.count(TrueSide))
-      LoopHeaders.insert(ThisBB);
+    substitutedBlockPreds(TrueSide, ThisBB);
     removeIfDead(TrueSide);
     addToWorklist(ThisBB);
     return true;
@@ -1597,8 +1619,7 @@ bool SimplifyCFG::simplifyCondBrBlock(CondBranchInst *BI) {
         falseTrampolineDest.destBB, falseTrampolineDest.newSourceBranchArgs,
         BI->getTrueBBCount(), BI->getFalseBBCount());
     BI->eraseFromParent();
-    if (LoopHeaders.count(FalseSide))
-      LoopHeaders.insert(ThisBB);
+    substitutedBlockPreds(FalseSide, ThisBB);
     removeIfDead(FalseSide);
     addToWorklist(ThisBB);
     return true;

@@ -7,6 +7,11 @@ sil_stage canonical
 import Builtin
 import Swift
 
+internal enum Enum {
+  case one
+  case two
+}
+
 // CHECK-LABEL: sil @simple_test : $@convention(thin) () -> () {
 // CHECK: bb0:
 // CHECK-NEXT: tuple
@@ -27,3 +32,64 @@ bb3:
   %9999 = tuple ()
   return %9999 : $()
 }
+
+// Test that SimplifyCFG::simplifyBlocks, tryJumpThreading does not
+// perform unbounded loop peeling.
+//
+// rdar://73357726 ([SR-14068]: Compiling with optimisation runs indefinitely for grpc-swift)
+// CHECK-LABEL: sil @testInfinitePeeling : $@convention(method) (Builtin.Int64, Enum) -> () {
+//
+// There is only one switch_enum blocks, and it is no longer in a loop.
+// CHECK: bb0(%0 : $Builtin.Int64, %1 : $Enum):
+// CHECK:   switch_enum %1 : $Enum, case #Enum.one!enumelt: bb3, case #Enum.two!enumelt: bb4
+// CHECK: bb1:
+// CHECK:   br bb8
+// CHECK: bb2:
+// CHECK:   br bb5(%{{.*}} : $Enum)
+//
+// This is the original cond_br block
+// CHECK: bb3:
+// CHECK:   cond_br %{{.*}}, bb2, bb1
+// CHECK: bb4:
+// CHECK:   br bb5(%1 : $Enum)
+//
+// This is the cond_br block after jump-threading.
+// CHECK: bb5(%{{.*}} : $Enum):
+// CHECK:   cond_br %{{.*}}, bb6, bb7
+// CHECK: bb6:
+// CHECK:   br bb5(%{{.*}} : $Enum)
+// CHECK: bb7:
+// CHECK:   br bb8
+// CHECK: bb8:
+// CHECK:   return %19 : $()
+// CHECK-LABEL: } // end sil function 'testInfinitePeeling'
+sil @testInfinitePeeling : $@convention(method) (Builtin.Int64, Enum) -> () {
+bb0(%0 : $Builtin.Int64, %1 : $Enum):
+  %2 = integer_literal $Builtin.Int64, 99999999
+  br bb1(%0 : $Builtin.Int64, %1 : $Enum)
+
+bb1(%4 : $Builtin.Int64, %5 : $Enum):
+  switch_enum %5 : $Enum, case #Enum.one!enumelt: bb4, default bb5
+
+bb2(%7 : $Builtin.Int64, %8 : $Enum):
+  %9 = builtin "cmp_slt_Int64"(%2 : $Builtin.Int64, %7 : $Builtin.Int64) : $Builtin.Int1
+  cond_br %9, bb3, bb6
+
+bb3:
+  br bb1(%7 : $Builtin.Int64, %8 : $Enum)
+
+bb4:
+  %12 = integer_literal $Builtin.Int64, 1
+  %13 = integer_literal $Builtin.Int1, -1
+  %14 = builtin "sadd_with_overflow_Int64"(%4 : $Builtin.Int64, %12 : $Builtin.Int64, %13 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
+  %15 = tuple_extract %14 : $(Builtin.Int64, Builtin.Int1), 0
+  %16 = enum $Enum, #Enum.two!enumelt
+  br bb2(%15 : $Builtin.Int64, %16 : $Enum)
+
+bb5:
+  br bb2(%2 : $Builtin.Int64, %5 : $Enum)
+
+bb6:
+  %19 = tuple ()
+  return %19 : $()
+}