Skip to content

Commit a190855

Browse files
authored
Merge pull request #61051 from meg-gupta/loopunrolladdressphi
Avoid generating address phis in LoopUnroll
2 parents fd8d7af + 035f062 commit a190855

File tree

4 files changed

+116
-27
lines changed

4 files changed

+116
-27
lines changed

lib/SIL/Utils/LoopInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "swift/SIL/Dominance.h"
1616
#include "swift/SIL/SILFunction.h"
1717
#include "swift/SIL/CFG.h"
18+
#include "swift/SILOptimizer/Utils/BasicBlockOptUtils.h"
1819
#include "llvm/Analysis/LoopInfoImpl.h"
1920
#include "llvm/Support/Debug.h"
2021

@@ -36,6 +37,17 @@ SILLoopInfo::SILLoopInfo(SILFunction *F, DominanceInfo *DT) : Dominance(DT) {
3637
}
3738

3839
bool SILLoop::canDuplicate(SILInstruction *I) const {
40+
SinkAddressProjections sinkProj;
41+
for (auto res : I->getResults()) {
42+
if (!res->getType().isAddress()) {
43+
continue;
44+
}
45+
auto canSink = sinkProj.analyzeAddressProjections(I);
46+
if (!canSink) {
47+
return false;
48+
}
49+
}
50+
3951
// The deallocation of a stack allocation must be in the loop, otherwise the
4052
// deallocation will be fed by a phi node of two allocations.
4153
if (I->isAllocatingStack()) {

lib/SIL/Verifier/SILVerifier.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -985,24 +985,6 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
985985
return InstNumbers[a] < InstNumbers[b];
986986
}
987987

988-
// FIXME: For sanity, address-type phis should be prohibited at all SIL
989-
// stages. However, the optimizer currently breaks the invariant in three
990-
// places:
991-
// 1. Normal Simplify CFG during conditional branch simplification
992-
// (sneaky jump threading).
993-
// 2. Simplify CFG via Jump Threading.
994-
// 3. Loop Rotation.
995-
//
996-
// BasicBlockCloner::canCloneInstruction and sinkAddressProjections is
997-
// designed to avoid this issue, we just need to make sure all passes use it
998-
// correctly.
999-
//
1000-
// Minimally, we must prevent address-type phis as long as access markers are
1001-
// preserved. A goal is to preserve access markers in OSSA.
1002-
bool prohibitAddressPhis() {
1003-
return F.hasOwnership();
1004-
}
1005-
1006988
void visitSILPhiArgument(SILPhiArgument *arg) {
1007989
// Verify that the `isPhiArgument` property is sound:
1008990
// - Phi arguments come from branches.
@@ -1026,7 +1008,7 @@ class SILVerifier : public SILVerifierBase<SILVerifier> {
10261008
"All phi argument inputs must be from branches.");
10271009
}
10281010
}
1029-
if (arg->isPhi() && prohibitAddressPhis()) {
1011+
if (arg->isPhi()) {
10301012
// As a property of well-formed SIL, we disallow address-type
10311013
// phis. Supporting them would prevent reliably reasoning about the
10321014
// underlying storage of memory access. This reasoning is important for

lib/SILOptimizer/LoopTransforms/LoopUnroll.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "swift/SILOptimizer/Analysis/LoopAnalysis.h"
2020
#include "swift/SILOptimizer/PassManager/Passes.h"
2121
#include "swift/SILOptimizer/PassManager/Transforms.h"
22+
#include "swift/SILOptimizer/Utils/BasicBlockOptUtils.h"
2223
#include "swift/SILOptimizer/Utils/PerformanceInlinerUtils.h"
2324
#include "swift/SILOptimizer/Utils/SILInliner.h"
2425
#include "swift/SILOptimizer/Utils/SILSSAUpdater.h"
@@ -48,6 +49,8 @@ class LoopCloner : public SILCloner<LoopCloner> {
4849
/// Clone the basic blocks in the loop.
4950
void cloneLoop();
5051

52+
void sinkAddressProjections();
53+
5154
// Update SSA helper.
5255
void collectLoopLiveOutValues(
5356
DenseMap<SILValue, SmallVector<SILValue, 8>> &LoopLiveOutValues);
@@ -69,10 +72,33 @@ class LoopCloner : public SILCloner<LoopCloner> {
6972

7073
} // end anonymous namespace
7174

75+
void LoopCloner::sinkAddressProjections() {
76+
SinkAddressProjections sinkProj;
77+
for (auto *bb : Loop->getBlocks()) {
78+
for (auto &inst : *bb) {
79+
for (auto res : inst.getResults()) {
80+
if (!res->getType().isAddress()) {
81+
continue;
82+
}
83+
for (auto use : res->getUses()) {
84+
auto *user = use->getUser();
85+
if (Loop->contains(user)) {
86+
continue;
87+
}
88+
bool canSink = sinkProj.analyzeAddressProjections(&inst);
89+
assert(canSink);
90+
sinkProj.cloneProjections();
91+
}
92+
}
93+
}
94+
}
95+
}
96+
7297
void LoopCloner::cloneLoop() {
7398
SmallVector<SILBasicBlock *, 16> ExitBlocks;
7499
Loop->getExitBlocks(ExitBlocks);
75100

101+
sinkAddressProjections();
76102
// Clone the entire loop.
77103
cloneReachableBlocks(Loop->getHeader(), ExitBlocks,
78104
/*insertAfter*/Loop->getLoopLatch());

test/SILOptimizer/loop_unroll_ossa.sil

Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,22 @@ struct UInt64 {
2121
var value: Builtin.Int64
2222
}
2323

24+
class Klass {
25+
var id: Int64
26+
}
27+
28+
struct WrapperStruct {
29+
var val: Klass
30+
}
31+
32+
sil [_semantics "array.check_subscript"] @checkbounds : $@convention(method) (Int64, Bool, @guaranteed Array<Klass>) -> _DependenceToken
33+
34+
sil [_semantics "array.get_element"] @getElement : $@convention(method) (Int64, Bool, _DependenceToken, @guaranteed Array<Klass>) -> @owned Klass
35+
36+
sil @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()
37+
38+
sil @get_wrapper_struct : $@convention(thin) (@inout WrapperStruct) -> ()
39+
2440
// CHECK-LABEL: sil [ossa] @loop_unroll_1 :
2541
// CHECK: bb0
2642
// CHECK: br bb1
@@ -495,14 +511,6 @@ bb4:
495511
return %8 : $()
496512
}
497513

498-
class Klass {
499-
var id: Int64
500-
}
501-
502-
sil [_semantics "array.check_subscript"] @checkbounds : $@convention(method) (Int64, Bool, @guaranteed Array<Klass>) -> _DependenceToken
503-
504-
sil [_semantics "array.get_element"] @getElement : $@convention(method) (Int64, Bool, _DependenceToken, @guaranteed Array<Klass>) -> @owned Klass
505-
506514
// CHECK-LABEL: sil [ossa] @sumKlassId :
507515
// CHECK: [[FUNC:%.*]] = function_ref @getElement :
508516
// CHECK: apply [[FUNC]]
@@ -724,3 +732,64 @@ bb3:
724732
return %tup : $()
725733
}
726734

735+
// Ensure we don't trigger presence of address phis verification
736+
sil [ossa] @unroll_with_addr_proj1 : $@convention(thin) (@in_guaranteed WrapperStruct) -> () {
737+
bb0(%0 : $*WrapperStruct):
738+
%1 = integer_literal $Builtin.Int64, 1
739+
%2 = integer_literal $Builtin.Int64, 4
740+
%3 = integer_literal $Builtin.Int1, -1
741+
br bb1(%1 : $Builtin.Int64, %2 : $Builtin.Int64)
742+
743+
bb1(%5 : $Builtin.Int64, %6 : $Builtin.Int64):
744+
%7 = builtin "sadd_with_overflow_Int64"(%5 : $Builtin.Int64, %1 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
745+
%8 = tuple_extract %7 : $(Builtin.Int64, Builtin.Int1), 0
746+
%9 = builtin "smul_with_overflow_Int64"(%6 : $Builtin.Int64, %2 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
747+
%10 = tuple_extract %9 : $(Builtin.Int64, Builtin.Int1), 0
748+
%11 = builtin "cmp_slt_Int64"(%8 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1
749+
%12 = struct_element_addr %0 : $*WrapperStruct, #WrapperStruct.val
750+
cond_br %11, bb2, bb3
751+
752+
bb2:
753+
br bb1(%8 : $Builtin.Int64, %10 : $Builtin.Int64)
754+
755+
bb3:
756+
%15 = function_ref @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()
757+
%16 = apply %15(%12) : $@convention(thin) (@in_guaranteed Klass) -> ()
758+
%17 = tuple ()
759+
return %17 : $()
760+
}
761+
762+
// Ensure we don't trigger presence of address phis verification
763+
sil [ossa] @unroll_with_addr_proj2 : $@convention(thin) (@in_guaranteed WrapperStruct) -> () {
764+
bb0(%0 : $*WrapperStruct):
765+
%1 = alloc_stack $WrapperStruct
766+
%2 = integer_literal $Builtin.Int64, 1
767+
%3 = integer_literal $Builtin.Int64, 4
768+
%4 = integer_literal $Builtin.Int1, -1
769+
%5 = load [copy] %0 : $*WrapperStruct
770+
store %5 to [init] %1 : $*WrapperStruct
771+
br bb1(%2 : $Builtin.Int64, %3 : $Builtin.Int64)
772+
773+
bb1(%8 : $Builtin.Int64, %9 : $Builtin.Int64):
774+
%10 = builtin "sadd_with_overflow_Int64"(%8 : $Builtin.Int64, %2 : $Builtin.Int64, %4 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
775+
%11 = tuple_extract %10 : $(Builtin.Int64, Builtin.Int1), 0
776+
%12 = builtin "smul_with_overflow_Int64"(%9 : $Builtin.Int64, %3 : $Builtin.Int64, %4 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
777+
%13 = tuple_extract %12 : $(Builtin.Int64, Builtin.Int1), 0
778+
%14 = builtin "cmp_slt_Int64"(%11 : $Builtin.Int64, %3 : $Builtin.Int64) : $Builtin.Int1
779+
%15 = function_ref @get_wrapper_struct : $@convention(thin) (@inout WrapperStruct) -> ()
780+
%16 = apply %15(%1) : $@convention(thin) (@inout WrapperStruct) -> ()
781+
%17 = struct_element_addr %1 : $*WrapperStruct, #WrapperStruct.val
782+
cond_br %14, bb2, bb3
783+
784+
bb2:
785+
br bb1(%11 : $Builtin.Int64, %13 : $Builtin.Int64)
786+
787+
bb3:
788+
%20 = function_ref @use_inguaranteed : $@convention(thin) (@in_guaranteed Klass) -> ()
789+
%21 = apply %20(%17) : $@convention(thin) (@in_guaranteed Klass) -> ()
790+
destroy_addr %1 : $*WrapperStruct
791+
dealloc_stack %1 : $*WrapperStruct
792+
%24 = tuple ()
793+
return %24 : $()
794+
}
795+

0 commit comments

Comments
 (0)