Skip to content

Avoid generating address phis in LoopUnroll #61051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions lib/SIL/Utils/LoopInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "swift/SIL/Dominance.h"
#include "swift/SIL/SILFunction.h"
#include "swift/SIL/CFG.h"
#include "swift/SILOptimizer/Utils/BasicBlockOptUtils.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Support/Debug.h"

Expand All @@ -36,6 +37,17 @@ SILLoopInfo::SILLoopInfo(SILFunction *F, DominanceInfo *DT) : Dominance(DT) {
}

bool SILLoop::canDuplicate(SILInstruction *I) const {
SinkAddressProjections sinkProj;
for (auto res : I->getResults()) {
if (!res->getType().isAddress()) {
continue;
}
auto canSink = sinkProj.analyzeAddressProjections(I);
if (!canSink) {
return false;
}
}

// The deallocation of a stack allocation must be in the loop, otherwise the
// deallocation will be fed by a phi node of two allocations.
if (I->isAllocatingStack()) {
Expand Down
20 changes: 1 addition & 19 deletions lib/SIL/Verifier/SILVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -985,24 +985,6 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
return InstNumbers[a] < InstNumbers[b];
}

// FIXME: For sanity, address-type phis should be prohibited at all SIL
// stages. However, the optimizer currently breaks the invariant in three
// places:
// 1. Normal Simplify CFG during conditional branch simplification
// (sneaky jump threading).
// 2. Simplify CFG via Jump Threading.
// 3. Loop Rotation.
//
// BasicBlockCloner::canCloneInstruction and sinkAddressProjections is
// designed to avoid this issue, we just need to make sure all passes use it
// correctly.
//
// Minimally, we must prevent address-type phis as long as access markers are
// preserved. A goal is to preserve access markers in OSSA.
bool prohibitAddressPhis() {
return F.hasOwnership();
}

void visitSILPhiArgument(SILPhiArgument *arg) {
// Verify that the `isPhiArgument` property is sound:
// - Phi arguments come from branches.
Expand All @@ -1026,7 +1008,7 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
"All phi argument inputs must be from branches.");
}
}
if (arg->isPhi() && prohibitAddressPhis()) {
if (arg->isPhi()) {
// As a property of well-formed SIL, we disallow address-type
// phis. Supporting them would prevent reliably reasoning about the
// underlying storage of memory access. This reasoning is important for
Expand Down
26 changes: 26 additions & 0 deletions lib/SILOptimizer/LoopTransforms/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
#include "swift/SILOptimizer/PassManager/Passes.h"
#include "swift/SILOptimizer/PassManager/Transforms.h"
#include "swift/SILOptimizer/Utils/BasicBlockOptUtils.h"
#include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h"
#include "swift/SILOptimizer/Utils/SILInliner.h"
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
Expand Down Expand Up @@ -48,6 +49,8 @@ class LoopCloner : public SILCloner<LoopCloner> {
/// Clone the basic blocks in the loop.
void cloneLoop();

void sinkAddressProjections();

// Update SSA helper.
void collectLoopLiveOutValues(
DenseMap<SILValue, SmallVector<SILValue, 8>> &LoopLiveOutValues);
Expand All @@ -69,10 +72,33 @@ class LoopCloner : public SILCloner<LoopCloner> {

} // end anonymous namespace

void LoopCloner::sinkAddressProjections() {
SinkAddressProjections sinkProj;
for (auto *bb : Loop->getBlocks()) {
for (auto &inst : *bb) {
for (auto res : inst.getResults()) {
if (!res->getType().isAddress()) {
continue;
}
for (auto use : res->getUses()) {
auto *user = use->getUser();
if (Loop->contains(user)) {
continue;
}
bool canSink = sinkProj.analyzeAddressProjections(&inst);
assert(canSink);
sinkProj.cloneProjections();
}
}
}
}
}

void LoopCloner::cloneLoop() {
SmallVector<SILBasicBlock *, 16> ExitBlocks;
Loop->getExitBlocks(ExitBlocks);

sinkAddressProjections();
// Clone the entire loop.
cloneReachableBlocks(Loop->getHeader(), ExitBlocks,
/*insertAfter*/Loop->getLoopLatch());
Expand Down
85 changes: 77 additions & 8 deletions test/SILOptimizer/loop_unroll_ossa.sil
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@ struct UInt64 {
var value: Builtin.Int64
}

class Klass {
var id: Int64
}

struct WrapperStruct {
var val: Klass
}

sil [_semantics "array.check_subscript"] @checkbounds : $@convention(method) (Int64, Bool, @guaranteed Array<Klass>) -> _DependenceToken

sil [_semantics "array.get_element"] @getElement : $@convention(method) (Int64, Bool, _DependenceToken, @guaranteed Array<Klass>) -> @owned Klass

sil @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()

sil @get_wrapper_struct : $@convention(thin) (@inout WrapperStruct) -> ()

// CHECK-LABEL: sil [ossa] @loop_unroll_1 :
// CHECK: bb0
// CHECK: br bb1
Expand Down Expand Up @@ -495,14 +511,6 @@ bb4:
return %8 : $()
}

class Klass {
var id: Int64
}

sil [_semantics "array.check_subscript"] @checkbounds : $@convention(method) (Int64, Bool, @guaranteed Array<Klass>) -> _DependenceToken

sil [_semantics "array.get_element"] @getElement : $@convention(method) (Int64, Bool, _DependenceToken, @guaranteed Array<Klass>) -> @owned Klass

// CHECK-LABEL: sil [ossa] @sumKlassId :
// CHECK: [[FUNC:%.*]] = function_ref @getElement :
// CHECK: apply [[FUNC]]
Expand Down Expand Up @@ -724,3 +732,64 @@ bb3:
return %tup : $()
}

// Ensure we don't trigger presence of address phis verification
sil [ossa] @unroll_with_addr_proj1 : $@convention(thin) (@in_guaranteed WrapperStruct) -> () {
bb0(%0 : $*WrapperStruct):
%1 = integer_literal $Builtin.Int64, 1
%2 = integer_literal $Builtin.Int64, 4
%3 = integer_literal $Builtin.Int1, -1
br bb1(%1 : $Builtin.Int64, %2 : $Builtin.Int64)

bb1(%5 : $Builtin.Int64, %6 : $Builtin.Int64):
%7 = builtin "sadd_with_overflow_Int64"(%5 : $Builtin.Int64, %1 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
%8 = tuple_extract %7 : $(Builtin.Int64, Builtin.Int1), 0
%9 = builtin "smul_with_overflow_Int64"(%6 : $Builtin.Int64, %2 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
%10 = tuple_extract %9 : $(Builtin.Int64, Builtin.Int1), 0
%11 = builtin "cmp_slt_Int64"(%8 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1
%12 = struct_element_addr %0 : $*WrapperStruct, #WrapperStruct.val
cond_br %11, bb2, bb3

bb2:
br bb1(%8 : $Builtin.Int64, %10 : $Builtin.Int64)

bb3:
%15 = function_ref @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()
%16 = apply %15(%12) : $@convention(thin) (@in_guaranteed Klass) -> ()
%17 = tuple ()
return %17 : $()
}

// Ensure we don't trigger presence of address phis verification
sil [ossa] @unroll_with_addr_proj2 : $@convention(thin) (@in_guaranteed WrapperStruct) -> () {
bb0(%0 : $*WrapperStruct):
%1 = alloc_stack $WrapperStruct
%2 = integer_literal $Builtin.Int64, 1
%3 = integer_literal $Builtin.Int64, 4
%4 = integer_literal $Builtin.Int1, -1
%5 = load [copy] %0 : $*WrapperStruct
store %5 to [init] %1 : $*WrapperStruct
br bb1(%2 : $Builtin.Int64, %3 : $Builtin.Int64)

bb1(%8 : $Builtin.Int64, %9 : $Builtin.Int64):
%10 = builtin "sadd_with_overflow_Int64"(%8 : $Builtin.Int64, %2 : $Builtin.Int64, %4 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
%11 = tuple_extract %10 : $(Builtin.Int64, Builtin.Int1), 0
%12 = builtin "smul_with_overflow_Int64"(%9 : $Builtin.Int64, %3 : $Builtin.Int64, %4 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
%13 = tuple_extract %12 : $(Builtin.Int64, Builtin.Int1), 0
%14 = builtin "cmp_slt_Int64"(%11 : $Builtin.Int64, %3 : $Builtin.Int64) : $Builtin.Int1
%15 = function_ref @get_wrapper_struct : $@convention(thin) (@inout WrapperStruct) -> ()
%16 = apply %15(%1) : $@convention(thin) (@inout WrapperStruct) -> ()
%17 = struct_element_addr %1 : $*WrapperStruct, #WrapperStruct.val
cond_br %14, bb2, bb3

bb2:
br bb1(%11 : $Builtin.Int64, %13 : $Builtin.Int64)

bb3:
%20 = function_ref @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()
%21 = apply %20(%17) : $@convention(thin) (@in_guaranteed Klass) -> ()
destroy_addr %1 : $*WrapperStruct
dealloc_stack %1 : $*WrapperStruct
%24 = tuple ()
return %24 : $()
}