Skip to content

Handle a gap while matching SIL patterns for hoisting array bound checks #65573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 58 additions & 12 deletions lib/SILOptimizer/LoopTransforms/ArrayBoundsCheckOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,18 @@ static SILValue getSub(SILLocation Loc, SILValue Val, unsigned SubVal,
return B.createTupleExtract(Loc, AI, 0);
}

static SILValue getAdd(SILLocation Loc, SILValue Val, unsigned AddVal,
SILBuilder &B) {
SmallVector<SILValue, 4> Args(1, Val);
Args.push_back(B.createIntegerLiteral(Loc, Val->getType(), AddVal));
Args.push_back(B.createIntegerLiteral(
Loc, SILType::getBuiltinIntegerType(1, B.getASTContext()), -1));

auto *AI = B.createBuiltinBinaryFunctionWithOverflow(
Loc, "sadd_with_overflow", Args);
return B.createTupleExtract(Loc, AI, 0);
}

/// A canonical induction variable incremented by one from Start to End-1.
struct InductionInfo {
SILArgument *HeaderVal;
Expand All @@ -552,12 +564,12 @@ struct InductionInfo {

SILInstruction *getInstruction() { return Inc; }

SILValue getFirstValue() {
return Start;
SILValue getFirstValue(SILLocation &Loc, SILBuilder &B, unsigned AddVal) {
return AddVal != 0 ? getAdd(Loc, Start, AddVal, B) : Start;
}

SILValue getLastValue(SILLocation &Loc, SILBuilder &B) {
return getSub(Loc, End, 1, B);
SILValue getLastValue(SILLocation &Loc, SILBuilder &B, unsigned SubVal) {
return SubVal != 0 ? getSub(Loc, End, SubVal, B) : End;
}

/// If necessary insert an overflow for this induction variable.
Expand Down Expand Up @@ -718,28 +730,62 @@ static bool isGuaranteedToBeExecuted(DominanceInfo *DT, SILBasicBlock *Block,
/// induction variable.
class AccessFunction {
InductionInfo *Ind;
bool preIncrement;

AccessFunction(InductionInfo *I, bool isPreIncrement = false)
: Ind(I), preIncrement(isPreIncrement) {}

AccessFunction(InductionInfo *I) { Ind = I; }
public:

operator bool() { return Ind != nullptr; }

static AccessFunction getLinearFunction(SILValue Idx,
InductionAnalysis &IndVars) {
// Match the actual induction variable buried in the integer struct.
// %2 = struct $Int(%1 : $Builtin.Word)
// = apply %check_bounds(%array, %2) : $@convention(thin) (Int, ArrayInt) -> ()
// bb(%ivar)
// %2 = struct $Int(%ivar : $Builtin.Word)
// = apply %check_bounds(%array, %2) :
// or
// bb(%ivar1)
// %ivar2 = builtin "sadd_with_overflow_Int64"(%ivar1,...)
// %t = tuple_extract %ivar2
// %s = struct $Int(%t : $Builtin.Word)
// = apply %check_bounds(%array, %s) :

bool preIncrement = false;

auto ArrayIndexStruct = dyn_cast<StructInst>(Idx);
if (!ArrayIndexStruct)
return nullptr;

auto AsArg =
dyn_cast<SILArgument>(ArrayIndexStruct->getElements()[0]);
if (!AsArg)
return nullptr;

if (!AsArg) {
auto *TupleExtract =
dyn_cast<TupleExtractInst>(ArrayIndexStruct->getElements()[0]);

if (!TupleExtract) {
return nullptr;
}

auto *Builtin = dyn_cast<BuiltinInst>(TupleExtract->getOperand());
if (!Builtin || Builtin->getBuiltinKind() != BuiltinValueKind::SAddOver) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It isn't clear to me whether the meaning of getLinearFunction changes depending on whether the bounds check is on the preincremented or postincremented induction variable. I think that needs to be specified. I suggest testing the boundary conditions to be sure there is no off-by-one bug.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there is a off-by-one bug. So far, the AccessFunction was the identity function. But with your change it can add the offset of the increment. You need to add that info to AccessFunction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I'll fix this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add a check in your tests for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eeckstein I added check lines with the bounds check now

return nullptr;
}

// We don't check if the second argument to the builtin is loop invariant
// here, because only induction variables with a +1 incremenent are
// considered for bounds check optimization.
AsArg = dyn_cast<SILArgument>(Builtin->getArguments()[0]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to check if the second argument is a loop-invariant constant?
And handle that in isZeroCount and hoistCheckToPreheader?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it was okay because InductionAnalysis bails out for values that are not integer constant 1 ? https://github.com/apple/swift/blob/dd813af0a7670f420d30aa95326863fbb772463a/lib/SILOptimizer/LoopTransforms/ArrayBoundsCheckOpts.cpp#L650

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll write some tests for this case.

Copy link
Contributor

@eeckstein eeckstein May 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, the dyn_cast<SILArgument> makes sure that the add-builtin is matching the increment of the induction variable. Can you add a comment which makes that clear?

Alternatively, you can replace that argument check with a constant-check of the second argument (as I suggested). Then the optimization could also handle loops like

for i in 0..<10 {
  a[i + c] // c is a constant
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eeckstein I'll add the increment by other constants in a different PR. There are other unhandled patterns as well. Eg: Local array patterns

if (!AsArg) {
return nullptr;
}
preIncrement = true;
}

if (auto *Ind = IndVars[AsArg])
return AccessFunction(Ind);
return AccessFunction(Ind, preIncrement);

return nullptr;
}
Expand All @@ -759,7 +805,7 @@ class AccessFunction {
SILBuilderWithScope Builder(Preheader->getTerminator(), AI);

// Get the first induction value.
auto FirstVal = Ind->getFirstValue();
auto FirstVal = Ind->getFirstValue(Loc, Builder, preIncrement ? 1 : 0);
// Clone the struct for the start index.
auto Start = cast<SingleValueInstruction>(CheckToHoist.getIndex())
->clone(Preheader->getTerminator());
Expand All @@ -771,7 +817,7 @@ class AccessFunction {
NewCheck->setOperand(1, Start);

// Get the last induction value.
auto LastVal = Ind->getLastValue(Loc, Builder);
auto LastVal = Ind->getLastValue(Loc, Builder, preIncrement ? 0 : 1);
// Clone the struct for the end index.
auto End = cast<SingleValueInstruction>(CheckToHoist.getIndex())
->clone(Preheader->getTerminator());
Expand Down
186 changes: 186 additions & 0 deletions test/SILOptimizer/abcopts.sil
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,192 @@ bb4(%31 : $Builtin.Int32):
return %32 : $Int32
}

// SIL pattern for :
// func hoist_new_ind_pattern1(_ a : [Int]) {
// for i in 0...4 {
// ...a[i]...
// }
// }
// Bounds check for the array should be hoisted out of the loop
//
// RANGECHECK-LABEL: sil @hoist_new_ind_pattern1 :
// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
// RANGECHECK: [[MINUS1:%.*]] = integer_literal $Builtin.Int1, -1
// RANGECHECK: [[TRUE:%.*]] = struct $Bool ([[MINUS1]] : $Builtin.Int1)
// RANGECHECK: [[ZERO:%.*]] = integer_literal $Builtin.Int32, 0
// RANGECHECK: [[INTZERO:%.*]] = struct $Int32 ([[ZERO]] : $Builtin.Int32)
// RANGECHECK: [[FOUR:%.*]] = integer_literal $Builtin.Int32, 4
// RANGECHECK: [[ONE:%.*]] = integer_literal $Builtin.Int32, 1
// RANGECHECK: [[ANOTHERTRUE:%.*]] = integer_literal $Builtin.Int1, -1
// RANGECHECK: [[INTONEADD:%.*]] = builtin "sadd_with_overflow_Int32"([[ZERO]] : $Builtin.Int32, [[ONE]] : $Builtin.Int32, [[ANOTHERTRUE]] : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
// RANGECHECK: [[INTONEEX:%.*]] = tuple_extract [[INTONEADD]] : $(Builtin.Int32, Builtin.Int1), 0
// RANGECHECK: [[INTONE:%.*]] = struct $Int32 ([[INTONEEX]] : $Builtin.Int32)
// RANGECHECK: [[A1:%.*]] = apply [[CB]]([[INTONE]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
// RANGECHECK: [[INTFOUR:%.*]] = struct $Int32 ([[FOUR]] : $Builtin.Int32)
// RANGECHECK: [[A2:%.*]] = apply [[CB]]([[INTFOUR]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
// RANGECHECK: bb1
// RANGECHECK-NOT: apply [[CB]]
// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern1'
sil @hoist_new_ind_pattern1 : $@convention(thin) (@owned ArrayInt) -> () {
bb0(%0 : $ArrayInt):
%minus1 = integer_literal $Builtin.Int1, -1
%true = struct $Bool(%minus1 : $Builtin.Int1)
%zero = integer_literal $Builtin.Int32, 0
%int0 = struct $Int32 (%zero : $Builtin.Int32)
%one = integer_literal $Builtin.Int32, 1
%four = integer_literal $Builtin.Int32, 4
%intfour = struct $Int32 (%four : $Builtin.Int32)
%cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
br bb1(%zero : $Builtin.Int32)

bb1(%4 : $Builtin.Int32):
%5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
%6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
%7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
%8 = struct $Int32 (%6 : $Builtin.Int32)
%9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
cond_br %9, bb3, bb2

bb2:
br bb1(%6 : $Builtin.Int32)

bb3:
%t = tuple ()
return %t : $()
}

// Currently this is not optimized because the induction var increment is 2
// Support for this can be added by updating induction variable analysis
// RANGECHECK-LABEL: sil @hoist_new_ind_pattern2 :
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add the pseudo-code for hoist_new_ind_pattern2 so we can see what's being tested and add comments on why incrementing by two prevents bounds check elimination?

// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
// RANGECHECK: bb1
// RANGECHECK: apply [[CB]]
// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern2'
sil @hoist_new_ind_pattern2 : $@convention(thin) (@owned ArrayInt) -> () {
bb0(%0 : $ArrayInt):
%minus1 = integer_literal $Builtin.Int1, -1
%true = struct $Bool(%minus1 : $Builtin.Int1)
%zero = integer_literal $Builtin.Int32, 0
%int0 = struct $Int32 (%zero : $Builtin.Int32)
%two = integer_literal $Builtin.Int32, 2
%four = integer_literal $Builtin.Int32, 4
%intfour = struct $Int32 (%four : $Builtin.Int32)
%cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
br bb1(%zero : $Builtin.Int32)

bb1(%4 : $Builtin.Int32):
%5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %two : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
%6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
%7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
cond_fail %7 : $Builtin.Int1
%8 = struct $Int32 (%6 : $Builtin.Int32)
%9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
cond_br %9, bb3, bb2

bb2:
br bb1(%6 : $Builtin.Int32)

bb3:
%t = tuple ()
return %t : $()
}

// This is currently not optimized because access function is not recognized
// SIL pattern for :
// for var index in 0...24
// {
// ...a[index + index]...
// }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment on why bounds check hoisting fails for hoist_new_ind_pattern3?

//
// RANGECHECK-LABEL: sil @hoist_new_ind_pattern3 :
// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
// RANGECHECK: bb1
// RANGECHECK: apply [[CB]]
// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern3'
sil @hoist_new_ind_pattern3 : $@convention(thin) (@owned ArrayInt) -> () {
bb0(%0 : $ArrayInt):
%minus1 = integer_literal $Builtin.Int1, -1
%true = struct $Bool(%minus1 : $Builtin.Int1)
%zero = integer_literal $Builtin.Int32, 0
%int0 = struct $Int32 (%zero : $Builtin.Int32)
%one = integer_literal $Builtin.Int32, 1
%four = integer_literal $Builtin.Int32, 4
%intfour = struct $Int32 (%four : $Builtin.Int32)
%cb = function_ref @checkbounds : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
br bb1(%zero : $Builtin.Int32)

bb1(%4 : $Builtin.Int32):
%5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
%6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
%7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
%8 = struct $Int32 (%6 : $Builtin.Int32)
%9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %four : $Builtin.Int32) : $Builtin.Int1
%10 = builtin "sadd_with_overflow_Int32"(%6 : $Builtin.Int32, %6 : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
%11 = tuple_extract %10 : $(Builtin.Int32, Builtin.Int1), 0
%12 = tuple_extract %10 : $(Builtin.Int32, Builtin.Int1), 1
%13 = struct $Int32 (%11 : $Builtin.Int32)
apply %cb(%13, %true, %0) : $@convention(method) (Int32, Bool, @owned ArrayInt) -> _DependenceToken
cond_br %9, bb3, bb2

bb2:
br bb1(%6 : $Builtin.Int32)

bb3:
%t = tuple ()
return %t : $()
}


// RANGECHECK-LABEL: sil @hoist_new_ind_pattern4 :
// RANGECHECK: [[MINUS1:%.*]] = integer_literal $Builtin.Int1, -1
// RANGECHECK: [[TRUE:%.*]] = struct $Bool ([[MINUS1]] : $Builtin.Int1)
// RANGECHECK: [[ZERO:%.*]] = integer_literal $Builtin.Int32, 0
// RANGECHECK: [[INTZERO:%.*]] = struct $Int32 ([[ZERO]] : $Builtin.Int32)
// RANGECHECK: [[CB:%.*]] = function_ref @checkbounds2 : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
// RANGECHECK: apply [[CB]]([[INTZERO]], [[TRUE]], %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
// RANGECHECK-NOT: apply [[CB]]
// RANGECHECK-LABEL: } // end sil function 'hoist_new_ind_pattern4'
// for i in 0..<a.count
// {
// ...a[i]...
// }
sil @hoist_new_ind_pattern4 : $@convention(thin) (@owned Array<Int>) -> () {
bb0(%0 : $Array<Int>):
%minus1 = integer_literal $Builtin.Int1, -1
%true = struct $Bool(%minus1 : $Builtin.Int1)
%zero = integer_literal $Builtin.Int32, 0
%int0 = struct $Int32 (%zero : $Builtin.Int32)
%one = integer_literal $Builtin.Int32, 1
%f1 = function_ref @getCount2 : $@convention(method) (@owned Array<Int>) -> Int32
%t1 = apply %f1(%0) : $@convention(method) (@owned Array<Int>) -> Int32
%count = struct_extract %t1 : $Int32, #Int32._value
%cb = function_ref @checkbounds2 : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
apply %cb(%int0, %true, %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
br bb1(%zero : $Builtin.Int32)

bb1(%4 : $Builtin.Int32):
%5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %one : $Builtin.Int32, %minus1 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1)
%6 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 0
%7 = tuple_extract %5 : $(Builtin.Int32, Builtin.Int1), 1
cond_fail %7 : $Builtin.Int1
%8 = struct $Int32 (%6 : $Builtin.Int32)
apply %cb(%8, %true, %0) : $@convention(method) (Int32, Bool, @owned Array<Int>) -> _DependenceToken
%9 = builtin "cmp_eq_Int32"(%6 : $Builtin.Int32, %count : $Builtin.Int32) : $Builtin.Int1
cond_br %9, bb3, bb2

bb2:
br bb1(%6 : $Builtin.Int32)

bb3:
%t = tuple ()
return %t : $()
}

sil public_external [_semantics "array.check_subscript"] @checkbounds_no_meth : $@convention(thin) (Int32, Bool, @owned ArrayInt) -> _DependenceToken {
bb0(%0: $Int32, %1: $Bool, %2: $ArrayInt):
unreachable
Expand Down