swiftlang · meg-gupta · May 10, 2023 · May 2, 2023 · atrick · May 2, 2023
@@ -530,6 +530,18 @@ static SILValue getSub(SILLocation Loc, SILValue Val, unsigned SubVal,
   return B.createTupleExtract(Loc, AI, 0);
 }
 
+static SILValue getAdd(SILLocation Loc, SILValue Val, unsigned AddVal,
+                       SILBuilder &B) {
+  SmallVector<SILValue, 4> Args(1, Val);
+  Args.push_back(B.createIntegerLiteral(Loc, Val->getType(), AddVal));
+  Args.push_back(B.createIntegerLiteral(
+      Loc, SILType::getBuiltinIntegerType(1, B.getASTContext()), -1));
+
+  auto *AI = B.createBuiltinBinaryFunctionWithOverflow(
+      Loc, "sadd_with_overflow", Args);
+  return B.createTupleExtract(Loc, AI, 0);
+}
+
 /// A canonical induction variable incremented by one from Start to End-1.
 struct InductionInfo {
   SILArgument *HeaderVal;
@@ -552,12 +564,12 @@ struct InductionInfo {
 
   SILInstruction *getInstruction() { return Inc; }
 
-  SILValue getFirstValue() {
-    return Start;
+  SILValue getFirstValue(SILLocation &Loc, SILBuilder &B, unsigned AddVal) {
+    return AddVal != 0 ? getAdd(Loc, Start, AddVal, B) : Start;
   }
 
-  SILValue getLastValue(SILLocation &Loc, SILBuilder &B) {
-    return getSub(Loc, End, 1, B);
+  SILValue getLastValue(SILLocation &Loc, SILBuilder &B, unsigned SubVal) {
+    return SubVal != 0 ? getSub(Loc, End, SubVal, B) : End;
   }
 
   /// If necessary insert an overflow for this induction variable.
@@ -718,28 +730,62 @@ static bool isGuaranteedToBeExecuted(DominanceInfo *DT, SILBasicBlock *Block,
 /// induction variable.
 class AccessFunction {
   InductionInfo *Ind;
+  bool preIncrement;
+
+  AccessFunction(InductionInfo *I, bool isPreIncrement = false)
+      : Ind(I), preIncrement(isPreIncrement) {}
 
-  AccessFunction(InductionInfo *I) { Ind = I; }
 public:
 
   operator bool() { return Ind != nullptr; }
 
   static AccessFunction getLinearFunction(SILValue Idx,
                                           InductionAnalysis &IndVars) {
     // Match the actual induction variable buried in the integer struct.
-    // %2 = struct $Int(%1 : $Builtin.Word)
-    //    = apply %check_bounds(%array, %2) : $@convention(thin) (Int, ArrayInt) -> ()
+    // bb(%ivar)
+    // %2 = struct $Int(%ivar : $Builtin.Word)
+    //    = apply %check_bounds(%array, %2) :
+    // or
+    // bb(%ivar1)
+    // %ivar2 = builtin "sadd_with_overflow_Int64"(%ivar1,...)
+    // %t = tuple_extract %ivar2
+    // %s = struct $Int(%t : $Builtin.Word)
+    //    = apply %check_bounds(%array, %s) :
+
+    bool preIncrement = false;
+
     auto ArrayIndexStruct = dyn_cast<StructInst>(Idx);
     if (!ArrayIndexStruct)
       return nullptr;
 
     auto AsArg =
         dyn_cast<SILArgument>(ArrayIndexStruct->getElements()[0]);
-    if (!AsArg)
-      return nullptr;
+
+    if (!AsArg) {
+      auto *TupleExtract =
+          dyn_cast<TupleExtractInst>(ArrayIndexStruct->getElements()[0]);
+
+      if (!TupleExtract) {
+        return nullptr;
+      }
+
+      auto *Builtin = dyn_cast<BuiltinInst>(TupleExtract->getOperand());
+      if (!Builtin || Builtin->getBuiltinKind() != BuiltinValueKind::SAddOver) {
+        return nullptr;
+      }
+
+      // We don't check if the second argument to the builtin is loop invariant
+      // here, because only induction variables with a +1 incremenent are
+      // considered for bounds check optimization.
+      AsArg = dyn_cast<SILArgument>(Builtin->getArguments()[0]);
+      if (!AsArg) {
+        return nullptr;
+      }
+      preIncrement = true;
+    }
 
     if (auto *Ind = IndVars[AsArg])
-      return AccessFunction(Ind);
+      return AccessFunction(Ind, preIncrement);
 
     return nullptr;
   }
@@ -759,7 +805,7 @@ class AccessFunction {
     SILBuilderWithScope Builder(Preheader->getTerminator(), AI);
 
     // Get the first induction value.
-    auto FirstVal = Ind->getFirstValue();
+    auto FirstVal = Ind->getFirstValue(Loc, Builder, preIncrement ? 1 : 0);
     // Clone the struct for the start index.
     auto Start = cast<SingleValueInstruction>(CheckToHoist.getIndex())
                      ->clone(Preheader->getTerminator());
@@ -771,7 +817,7 @@ class AccessFunction {
     NewCheck->setOperand(1, Start);
 
     // Get the last induction value.
-    auto LastVal = Ind->getLastValue(Loc, Builder);
+    auto LastVal = Ind->getLastValue(Loc, Builder, preIncrement ? 0 : 1);
     // Clone the struct for the end index.
     auto End = cast<SingleValueInstruction>(CheckToHoist.getIndex())
                    ->clone(Preheader->getTerminator());

@@ -1096,6 +1096,192 @@ bb4(%31 : $Builtin.Int32):
   return %32 : $Int32
 }
 
+// SIL pattern for :
+// func hoist_new_ind_pattern1(_ a : [Int]) {
+//   for i in 0...4 {
+//      ...a[i]...
+//   }
+// }
+// Bounds check for the array should be hoisted out of the loop
+//
+// RANGECHECK-LABEL: sil @hoist_new_ind_pattern1 :
+// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+// RANGECHECK: [[MINUS1:%.*]] = integer_literal $Builtin.Int1, -1
+// RANGECHECK: [[TRUE:%.*]] = struct $Bool ([[MINUS1]] : $Builtin.Int1)
+// RANGECHECK: [[ZERO:%.*]] = integer_literal $Builtin.Int32, 0
+// RANGECHECK: [[INTZERO:%.*]] = struct $Int32 ([[ZERO]] : $Builtin.Int32)
+// RANGECHECK: [[FOUR:%.*]] = integer_literal $Builtin.Int32, 4
+// RANGECHECK: [[ONE:%.*]] = integer_literal $Builtin.Int32, 1
+// RANGECHECK: [[ANOTHERTRUE:%.*]] = integer_literal $Builtin.Int1, -1
+// RANGECHECK: [[INTONEADD:%.*]] = builtin "sadd_with_overflow_Int32"([[ZERO]] : $Builtin.Int32, [[ONE]] : $Builtin.Int32, [[ANOTHERTRUE]] : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+// RANGECHECK: [[INTONEEX:%.*]] = tuple_extract [[INTONEADD]] : $(Builtin.Int32, Builtin.Int1), 0
+// RANGECHECK: [[INTONE:%.*]] = struct $Int32 ([[INTONEEX]] : $Builtin.Int32)
+// RANGECHECK: [[A1:%.*]] = apply [[CB]]([[INTONE]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+// RANGECHECK: [[INTFOUR:%.*]] = struct $Int32 ([[FOUR]] : $Builtin.Int32)
+// RANGECHECK: [[A2:%.*]] = apply [[CB]]([[INTFOUR]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+// RANGECHECK: bb1
+// RANGECHECK-NOT: apply [[CB]]
+// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern1'
+sil @hoist_new_ind_pattern1 : $@convention(thin) (@owned ArrayInt) -> () {
+bb0(%0 : $ArrayInt):
+  %minus1 = integer_literal $Builtin.Int1, -1
+  %true = struct $Bool(%minus1 : $Builtin.Int1)
+  %zero = integer_literal $Builtin.Int32, 0
+  %int0 = struct $Int32 (%zero : $Builtin.Int32)
+  %one = integer_literal $Builtin.Int32, 1
+  %four = integer_literal $Builtin.Int32, 4
+  %intfour = struct $Int32 (%four : $Builtin.Int32)
+  %cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  br bb1(%zero : $Builtin.Int32)
+
+bb1(%4 : $Builtin.Int32):
+  %5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+  %6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
+  %7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
+  %8 = struct $Int32 (%6 : $Builtin.Int32)
+  %9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
+  apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  cond_br %9, bb3, bb2
+
+bb2:
+  br bb1(%6 : $Builtin.Int32)
+
+bb3:
+  %t = tuple ()
+  return %t : $()
+}
+
+// Currently this is not optimized because the induction var increment is 2
+// Support for this can be added by updating induction variable analysis
+// RANGECHECK-LABEL: sil @hoist_new_ind_pattern2 :
+// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+// RANGECHECK: bb1
+// RANGECHECK: apply [[CB]]
+// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern2'
+sil @hoist_new_ind_pattern2 : $@convention(thin) (@owned ArrayInt) -> () {
+bb0(%0 : $ArrayInt):
+  %minus1 = integer_literal $Builtin.Int1, -1
+  %true = struct $Bool(%minus1 : $Builtin.Int1)
+  %zero = integer_literal $Builtin.Int32, 0
+  %int0 = struct $Int32 (%zero : $Builtin.Int32)
+  %two = integer_literal $Builtin.Int32, 2
+  %four = integer_literal $Builtin.Int32, 4
+  %intfour = struct $Int32 (%four : $Builtin.Int32)
+  %cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  br bb1(%zero : $Builtin.Int32)
+
+bb1(%4 : $Builtin.Int32):
+  %5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %two : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+  %6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
+  %7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
+  cond_fail %7 : $Builtin.Int1
+  %8 = struct $Int32 (%6 : $Builtin.Int32)
+  %9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
+  apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  cond_br %9, bb3, bb2
+
+bb2:
+  br bb1(%6 : $Builtin.Int32)
+
+bb3:
+  %t = tuple ()
+  return %t : $()
+}
+
+// This is currently not optimized because access function is not recognized
+// SIL pattern for :
+//  for var index in 0...24
+//  {
+//     ...a[index + index]...
+//  }
+//
+// RANGECHECK-LABEL: sil @hoist_new_ind_pattern3 :
+// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+// RANGECHECK: bb1
+// RANGECHECK: apply [[CB]]
+// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern3'
+sil @hoist_new_ind_pattern3 : $@convention(thin) (@owned ArrayInt) -> () {
+bb0(%0 : $ArrayInt):
+  %minus1 = integer_literal $Builtin.Int1, -1
+  %true = struct $Bool(%minus1 : $Builtin.Int1)
+  %zero = integer_literal $Builtin.Int32, 0
+  %int0 = struct $Int32 (%zero : $Builtin.Int32)
+  %one = integer_literal $Builtin.Int32, 1
+  %four = integer_literal $Builtin.Int32, 4
+  %intfour = struct $Int32 (%four : $Builtin.Int32)
+  %cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  br bb1(%zero : $Builtin.Int32)
+
+bb1(%4 : $Builtin.Int32):
+  %5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+  %6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
+  %7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
+  %8 = struct $Int32 (%6 : $Builtin.Int32)
+  %9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
+  %10 = builtin "sadd_with_overflow_Int32"(%6 : $Builtin.Int32, %6 : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+  %11 = tuple_extract %10 : $(Builtin.Int32, Builtin.Int1), 0
+  %12 = tuple_extract %10 : $(Builtin.Int32, Builtin.Int1), 1
+  %13 = struct $Int32 (%11 : $Builtin.Int32)
+  apply %cb(%13, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
+  cond_br %9, bb3, bb2
+
+bb2:
+  br bb1(%6 : $Builtin.Int32)
+
+bb3:
+  %t = tuple ()
+  return %t : $()
+}
+
+
+// RANGECHECK-LABEL: sil @hoist_new_ind_pattern4 :
+// RANGECHECK: [[MINUS1:%.*]] = integer_literal $Builtin.Int1, -1
+// RANGECHECK: [[TRUE:%.*]] = struct $Bool ([[MINUS1]] : $Builtin.Int1)
+// RANGECHECK: [[ZERO:%.*]] = integer_literal $Builtin.Int32, 0
+// RANGECHECK: [[INTZERO:%.*]] = struct $Int32 ([[ZERO]] : $Builtin.Int32)
+// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds2 : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
+// RANGECHECK: apply [[CB]]([[INTZERO]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
+// RANGECHECK-NOT: apply [[CB]]
+// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern4'
+//   for i in 0..<a.count
+//   {
+//      ...a[i]...
+//   }
+sil @hoist_new_ind_pattern4 : $@convention(thin) (@owned Array<Int>) -> () {
+bb0(%0 : $Array<Int>):
+  %minus1 = integer_literal $Builtin.Int1, -1
+  %true = struct $Bool(%minus1 : $Builtin.Int1)
+  %zero = integer_literal $Builtin.Int32, 0
+  %int0 = struct $Int32 (%zero : $Builtin.Int32)
+  %one = integer_literal $Builtin.Int32, 1
+  %f1 = function_ref @getCount2 : $@convention(method) (@owned Array<Int>) -> Int32
+  %t1 = apply %f1(%0) : $@convention(method) (@owned Array<Int>) -> Int32
+  %count = struct_extract %t1 : $Int32, #Int32._value
+  %cb = function_ref @checkbounds2 : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
+  apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
+  br bb1(%zero : $Builtin.Int32)
+
+bb1(%4 : $Builtin.Int32):
+  %5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
+  %6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
+  %7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
+  cond_fail %7 : $Builtin.Int1
+  %8 = struct $Int32 (%6 : $Builtin.Int32)
+  apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
+  %9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %count : $Builtin.Int32) : $Builtin.Int1
+  cond_br %9, bb3, bb2
+
+bb2:
+  br bb1(%6 : $Builtin.Int32)
+
+bb3:
+  %t = tuple ()
+  return %t : $()
+}
+
 sil public_external [_semantics "array.check_subscript"] @checkbounds_no_meth : $@convention(thin) (Int32, Bool, @owned ArrayInt) -> _DependenceToken {
   bb0(%0: $Int32, %1: $Bool, %2: $ArrayInt):
     unreachable