Skip to content

Commit f56b5d8

Browse files
author
Joe Shajrawi
committed
[LICM] Add support for Hosting <Instruction, Instruction Set> Pairs
Support having the target of each hoist instruction as multiple sinks.
1 parent bc59eaa commit f56b5d8

File tree

2 files changed

+270
-23
lines changed

2 files changed

+270
-23
lines changed

lib/SILOptimizer/LoopTransforms/LICM.cpp

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,13 @@ using namespace swift;
4040

4141
/// Instructions which can be hoisted:
4242
/// loads, function calls without side effects and (some) exclusivity checks
43-
using HoistSet = llvm::SmallPtrSet<SILInstruction *, 8>;
43+
using InstSet = llvm::SmallPtrSet<SILInstruction *, 8>;
4444

4545
/// Instruction pairs which need to be hoisted together:
4646
/// e.g. If we hoist a begin access, we need to sink the matching end access
47-
using HoistPairSet =
48-
llvm::SmallVector<std::pair<SILInstruction *, SILInstruction *>, 8>;
47+
/// The target of each hoist instruction can be multiple sinks. As such,
48+
/// A pair consists of an instruction to a set of matching sink instructions
49+
using HoistPairSet = llvm::SmallVector<std::pair<SILInstruction *, InstSet>, 8>;
4950

5051
/// A subset of instruction which may have side effects.
5152
/// Doesn't contain ones that have special handling (e.g. fix_lifetime)
@@ -149,6 +150,7 @@ static bool hoistInstruction(DominanceInfo *DT, SILInstruction *Inst,
149150
DEBUG(llvm::dbgs() << " loop variant operands\n");
150151
return false;
151152
}
153+
152154
auto mvBefore = Preheader->getTerminator();
153155
ArraySemanticsCall semCall(Inst);
154156
if (semCall.canHoist(mvBefore, DT)) {
@@ -160,7 +162,7 @@ static bool hoistInstruction(DominanceInfo *DT, SILInstruction *Inst,
160162
}
161163

162164
static bool hoistInstructions(SILLoop *Loop, DominanceInfo *DT,
163-
HoistSet &HoistUpSet) {
165+
InstSet &HoistUpSet) {
164166
DEBUG(llvm::dbgs() << " Hoisting instructions.\n");
165167
auto Preheader = Loop->getLoopPreheader();
166168
assert(Preheader && "Expected a preheader");
@@ -271,7 +273,7 @@ static bool sinkInstruction(DominanceInfo *DT,
271273

272274
static bool sinkInstructions(std::unique_ptr<LoopNestSummary> &LoopSummary,
273275
DominanceInfo *DT, SILLoopInfo *LI,
274-
HoistSet &SinkDownSet) {
276+
InstSet &SinkDownSet) {
275277
auto *Loop = LoopSummary->Loop;
276278
DEBUG(llvm::errs() << " Sink instructions attempt\n");
277279
SmallVector<SILBasicBlock *, 8> domBlocks;
@@ -300,15 +302,18 @@ hoistAndSinkInstructionPairs(std::unique_ptr<LoopNestSummary> &LoopSummary,
300302
assert(Preheader && "Expected a preheader");
301303

302304
bool Changed = false;
305+
303306
for (auto pair : Pairs) {
304307
auto *Up = pair.first;
305-
auto *Down = pair.second;
308+
auto &SinkDownSet = pair.second;
306309
if (!hoistInstruction(DT, Up, Loop, Preheader)) {
307310
continue;
308311
}
309312
DEBUG(llvm::dbgs() << "Hoisted " << *Up);
310-
if (!sinkInstruction(DT, LoopSummary, Down, LI)) {
311-
llvm_unreachable("LICM: Could not perform must-sink instruction");
313+
for (auto *instSink : SinkDownSet) {
314+
if (!sinkInstruction(DT, LoopSummary, instSink, LI)) {
315+
llvm_unreachable("LICM: Could not perform must-sink instruction");
316+
}
312317
}
313318
DEBUG(llvm::errs() << " Successfully hosited and sank pair\n");
314319
Changed = true;
@@ -333,10 +338,10 @@ class LoopTreeOptimization {
333338
bool RunsOnHighLevelSIL;
334339

335340
/// Instructions that we may be able to hoist up
336-
HoistSet HoistUp;
341+
InstSet HoistUp;
337342

338343
/// Instructions that we may be able to sink down
339-
HoistSet SinkDown;
344+
InstSet SinkDown;
340345

341346
/// Instruction pairs that we may be able to hoist and sink
342347
HoistPairSet HoistingPairs;
@@ -462,7 +467,26 @@ static bool canHoistUpDefault(SILInstruction *inst, SILLoop *Loop,
462467
return semCall.canHoist(Preheader->getTerminator(), DT);
463468
}
464469

465-
static void analyzeBeginAccess(BeginAccessInst *&BI,
470+
// Check If all the end accesses of the given begin do not prevent hoisting
471+
// There are only two legal placements for the end access instructions:
472+
// 1) Inside the same loop (sink to loop exists)
473+
// Potential TODO: At loop exit block
474+
static bool handledEndAccesses(BeginAccessInst *BI, SILLoop *Loop) {
475+
for (auto Use : BI->getUses()) {
476+
auto *User = Use->getUser();
477+
if (!dyn_cast<EndAccessInst>(User)) {
478+
continue;
479+
}
480+
auto *BB = User->getParent();
481+
if (Loop->getBlocksSet().count(BB) != 0) {
482+
continue;
483+
}
484+
return false;
485+
}
486+
return true;
487+
}
488+
489+
static void analyzeBeginAccess(BeginAccessInst *BI,
466490
SmallVector<BeginAccessInst *, 8> &BeginAccesses,
467491
SmallVector<EndAccessInst *, 8> &EndAccesses,
468492
HoistPairSet &HoistingPairs) {
@@ -476,23 +500,26 @@ static void analyzeBeginAccess(BeginAccessInst *&BI,
476500
return;
477501
}
478502

479-
// find matching end access:
503+
// find matching end accesses:
504+
InstSet matchingEnds;
480505
auto matchingEndPred = [&](EndAccessInst *EI) {
481506
return EI->getBeginAccess() == BI;
482507
};
483-
auto matchingEnd =
484-
std::find_if(EndAccesses.begin(), EndAccesses.end(), matchingEndPred);
485-
if (matchingEnd == EndAccesses.end()) {
486-
// no matching end within the loop
487-
return;
508+
for (auto matchingEnd = std::find_if(EndAccesses.begin(), EndAccesses.end(),
509+
matchingEndPred);
510+
matchingEnd != EndAccesses.end();
511+
matchingEnd =
512+
std::find_if(matchingEnd, EndAccesses.end(), matchingEndPred)) {
513+
auto *EI = *matchingEnd;
514+
matchingEnds.insert(EI);
515+
++matchingEnd;
488516
}
489-
auto *EI = *matchingEnd;
490-
++matchingEnd;
491-
if (std::find_if(matchingEnd, EndAccesses.end(), matchingEndPred) !=
492-
EndAccesses.end()) {
493-
// We expect a single matching end access in the loop
517+
if (matchingEnds.empty()) {
518+
// no matching end within the loop
494519
return;
495520
}
521+
DEBUG(llvm::dbgs() << "Found " << matchingEnds.size() << " End accesses"
522+
<< "\n");
496523

497524
auto BIAccessedStorageNonNested = findAccessedStorageNonNested(BI);
498525
auto safeBeginPred = [&](BeginAccessInst *OtherBI) {
@@ -504,7 +531,7 @@ static void analyzeBeginAccess(BeginAccessInst *&BI,
504531
};
505532

506533
if (std::all_of(BeginAccesses.begin(), BeginAccesses.end(), safeBeginPred)) {
507-
HoistingPairs.push_back(std::make_pair(BI, EI));
534+
HoistingPairs.emplace_back(std::make_pair(BI, std::move(matchingEnds)));
508535
}
509536
}
510537

@@ -618,6 +645,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
618645
}
619646
}
620647
for (auto *BI : BeginAccesses) {
648+
if (!handledEndAccesses(BI, Loop)) {
649+
DEBUG(llvm::dbgs() << "Skipping: " << *BI);
650+
DEBUG(llvm::dbgs() << "Some end accesses can't be handled"
651+
<< "\n");
652+
continue;
653+
}
621654
analyzeBeginAccess(BI, BeginAccesses, EndAccesses, HoistingPairs);
622655
}
623656
}

test/SILOptimizer/licm_multiend.sil

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
// RUN: %target-sil-opt -assume-parsing-unqualified-ownership-sil -enable-sil-verify-all %s -licm | %FileCheck %s
2+
// REQUIRES: CPU=x86_64
3+
// REQUIRES: OS=macosx
4+
5+
sil_stage canonical
6+
7+
import Builtin
8+
import Swift
9+
import SwiftShims
10+
11+
var x: Int
12+
13+
let reversedArray: ReversedCollection<[Int]>
14+
15+
// x
16+
sil_global hidden @$S3tmp1xSivp : $Int
17+
18+
// reversedArray
19+
sil_global hidden [let] @$S3tmp13reversedArrays18ReversedCollectionVySaySiGGvp : $ReversedCollection<Array<Int>>
20+
21+
// _swiftEmptyArrayStorage
22+
sil_global @_swiftEmptyArrayStorage : $_SwiftEmptyArrayStorage
23+
24+
25+
// CHECK-LABEL: sil hidden @multi_end_licm : $@convention(thin) () -> () {
26+
// CHECK: bb2:
27+
// CHECK: [[GLOBALVAR:%.*]] = global_addr @$S3tmp1xSivp : $*Int
28+
// CHECK: [[BEGINA:%.*]] = begin_access [modify] [dynamic] [no_nested_conflict] [[GLOBALVAR]] : $*Int
29+
// CHECK-NEXT: br [[LOOPH:bb[0-9]+]]({{.*}} : $Builtin.Int64)
30+
// CHECK: [[LOOPH]]({{.*}} : $Builtin.Int64)
31+
// CHECK: cond_br {{.*}}, [[LOOPCOND1:bb[0-9]+]], [[LOOPCOND2:bb[0-9]+]]
32+
// CHECK: [[LOOPCOND1]]:
33+
// CHECK-NEXT: store
34+
// CHECK-NEXT: cond_br {{.*}}, [[LOOPEXIT1:bb[0-9]+]], [[LOOPCONT1:bb[0-9]+]]
35+
// CHECK: [[LOOPEXIT1]]:
36+
// CHECK-NEXT: end_access [[BEGINA]] : $*Int
37+
// CHECK-NEXT: br [[LOOPAFTEREXIT:bb[0-9]+]]
38+
// CHECK: [[LOOPCOND2]]:
39+
// CHECK-NEXT: struct $Int
40+
// CHECK-NEXT: store
41+
// CHECK-NEXT: cond_br {{.*}}, [[LOOPEXIT2:bb[0-9]+]], [[LOOPCONT1]]
42+
// CHECK: [[LOOPEXIT2]]:
43+
// CHECK-NEXT: end_access [[BEGINA]] : $*Int
44+
// CHECK-NEXT: br [[LOOPAFTEREXIT]]
45+
// CHECK: [[LOOPCONT1]]:
46+
// CHECK-NEXT: br [[LOOPH]]
47+
// CHECK: [[LOOPAFTEREXIT]]:
48+
// CHECK-NEXT: br [[FUNCRET:bb[0-9]+]]
49+
// CHECK: [[FUNCRET]]:
50+
// CHECK-NEXT: tuple
51+
// CHECK-NEXT: return
52+
sil hidden @multi_end_licm : $@convention(thin) () -> () {
53+
bb0:
54+
%0 = global_addr @$S3tmp13reversedArrays18ReversedCollectionVySaySiGGvp : $*ReversedCollection<Array<Int>>
55+
%1 = struct_element_addr %0 : $*ReversedCollection<Array<Int>>, #ReversedCollection._base
56+
%2 = struct_element_addr %1 : $*Array<Int>, #Array._buffer
57+
%3 = struct_element_addr %2 : $*_ArrayBuffer<Int>, #_ArrayBuffer._storage
58+
%4 = struct_element_addr %3 : $*_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCore>, #_BridgeStorage.rawValue
59+
%5 = load %4 : $*Builtin.BridgeObject
60+
%6 = unchecked_ref_cast %5 : $Builtin.BridgeObject to $_ContiguousArrayStorageBase
61+
%7 = ref_element_addr %6 : $_ContiguousArrayStorageBase, #_ContiguousArrayStorageBase.countAndCapacity
62+
%8 = struct_element_addr %7 : $*_ArrayBody, #_ArrayBody._storage
63+
%9 = struct_element_addr %8 : $*_SwiftArrayBodyStorage, #_SwiftArrayBodyStorage.count
64+
%10 = struct_element_addr %9 : $*Int, #Int._value
65+
%11 = load %10 : $*Builtin.Int64
66+
%12 = builtin "assumeNonNegative_Int64"(%11 : $Builtin.Int64) : $Builtin.Int64
67+
%13 = integer_literal $Builtin.Int64, 0
68+
%14 = integer_literal $Builtin.Int1, 0
69+
%15 = builtin "cmp_eq_Int64"(%12 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
70+
%16 = builtin "int_expect_Int1"(%15 : $Builtin.Int1, %14 : $Builtin.Int1) : $Builtin.Int1
71+
cond_br %16, bb1, bb2
72+
73+
bb1:
74+
br bbRet
75+
76+
bb2:
77+
%19 = global_addr @$S3tmp1xSivp : $*Int
78+
%20 = integer_literal $Builtin.Int64, 1
79+
%21 = integer_literal $Builtin.Int1, -1
80+
%23 = ref_tail_addr %6 : $_ContiguousArrayStorageBase, $Int
81+
br bb4(%12 : $Builtin.Int64)
82+
83+
bb4(%27 : $Builtin.Int64):
84+
%28 = builtin "ssub_with_overflow_Int64"(%27 : $Builtin.Int64, %20 : $Builtin.Int64, %21 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
85+
%29 = tuple_extract %28 : $(Builtin.Int64, Builtin.Int1), 0
86+
%30 = tuple_extract %28 : $(Builtin.Int64, Builtin.Int1), 1
87+
cond_fail %30 : $Builtin.Int1
88+
%32 = builtin "cmp_slt_Int64"(%29 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
89+
%33 = load %10 : $*Builtin.Int64
90+
%34 = builtin "assumeNonNegative_Int64"(%33 : $Builtin.Int64) : $Builtin.Int64
91+
%35 = builtin "cmp_slt_Int64"(%29 : $Builtin.Int64, %34 : $Builtin.Int64) : $Builtin.Int1
92+
%36 = builtin "xor_Int1"(%35 : $Builtin.Int1, %21 : $Builtin.Int1) : $Builtin.Int1
93+
%37 = builtin "or_Int1"(%32 : $Builtin.Int1, %36 : $Builtin.Int1) : $Builtin.Int1
94+
cond_fail %37 : $Builtin.Int1
95+
%39 = builtin "truncOrBitCast_Int64_Word"(%29 : $Builtin.Int64) : $Builtin.Word
96+
%40 = index_addr %23 : $*Int, %39 : $Builtin.Word
97+
%41 = struct_element_addr %40 : $*Int, #Int._value
98+
%42 = load %41 : $*Builtin.Int64
99+
%43 = struct $Int (%42 : $Builtin.Int64)
100+
debug_value %43 : $Int, let, name "item"
101+
%global = begin_access [modify] [dynamic] [no_nested_conflict] %19 : $*Int
102+
%46 = builtin "cmp_eq_Int64"(%29 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
103+
%47 = builtin "int_expect_Int1"(%46 : $Builtin.Int1, %14 : $Builtin.Int1) : $Builtin.Int1
104+
cond_br %47, bbend1, bbend2
105+
106+
bbend1:
107+
store %43 to %global : $*Int
108+
end_access %global : $*Int
109+
cond_br %47, bb6, bb5
110+
111+
bbend2:
112+
%otherInt = struct $Int (%27 : $Builtin.Int64)
113+
store %otherInt to %global : $*Int
114+
end_access %global : $*Int
115+
cond_br %47, bb6, bb5
116+
117+
bb5:
118+
br bb4(%29 : $Builtin.Int64)
119+
120+
bb6:
121+
br bbRet
122+
123+
bbRet:
124+
%25 = tuple ()
125+
return %25 : $()
126+
} // end sil function 'multi_end_licm'
127+
128+
// CHECK-LABEL: sil hidden @multi_end_licm_loop_exit : $@convention(thin) () -> () {
129+
// CHECK: br [[LOOPH:bb[0-9]+]]({{.*}} : $Builtin.Int64)
130+
// CHECK: [[LOOPH]]({{.*}} : $Builtin.Int64)
131+
// CHECK: begin_access [modify] [dynamic] [no_nested_conflict]
132+
// CHECK: cond_br {{.*}}, [[LOOPCOND1:bb[0-9]+]], [[LOOPCOND2:bb[0-9]+]]
133+
// CHECK: [[LOOPCOND1]]
134+
// CHECK-NEXT: store
135+
// CHECK-NEXT: end_access
136+
// CHECK: return
137+
sil hidden @multi_end_licm_loop_exit : $@convention(thin) () -> () {
138+
bb0:
139+
%0 = global_addr @$S3tmp13reversedArrays18ReversedCollectionVySaySiGGvp : $*ReversedCollection<Array<Int>>
140+
%1 = struct_element_addr %0 : $*ReversedCollection<Array<Int>>, #ReversedCollection._base
141+
%2 = struct_element_addr %1 : $*Array<Int>, #Array._buffer
142+
%3 = struct_element_addr %2 : $*_ArrayBuffer<Int>, #_ArrayBuffer._storage
143+
%4 = struct_element_addr %3 : $*_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCore>, #_BridgeStorage.rawValue
144+
%5 = load %4 : $*Builtin.BridgeObject
145+
%6 = unchecked_ref_cast %5 : $Builtin.BridgeObject to $_ContiguousArrayStorageBase
146+
%7 = ref_element_addr %6 : $_ContiguousArrayStorageBase, #_ContiguousArrayStorageBase.countAndCapacity
147+
%8 = struct_element_addr %7 : $*_ArrayBody, #_ArrayBody._storage
148+
%9 = struct_element_addr %8 : $*_SwiftArrayBodyStorage, #_SwiftArrayBodyStorage.count
149+
%10 = struct_element_addr %9 : $*Int, #Int._value
150+
%11 = load %10 : $*Builtin.Int64
151+
%12 = builtin "assumeNonNegative_Int64"(%11 : $Builtin.Int64) : $Builtin.Int64
152+
%13 = integer_literal $Builtin.Int64, 0
153+
%14 = integer_literal $Builtin.Int1, 0
154+
%15 = builtin "cmp_eq_Int64"(%12 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
155+
%16 = builtin "int_expect_Int1"(%15 : $Builtin.Int1, %14 : $Builtin.Int1) : $Builtin.Int1
156+
cond_br %16, bb1, bb2
157+
158+
bb1:
159+
br bbRet
160+
161+
bb2:
162+
%19 = global_addr @$S3tmp1xSivp : $*Int
163+
%20 = integer_literal $Builtin.Int64, 1
164+
%21 = integer_literal $Builtin.Int1, -1
165+
%23 = ref_tail_addr %6 : $_ContiguousArrayStorageBase, $Int
166+
br bb4(%12 : $Builtin.Int64)
167+
168+
bb4(%27 : $Builtin.Int64):
169+
%28 = builtin "ssub_with_overflow_Int64"(%27 : $Builtin.Int64, %20 : $Builtin.Int64, %21 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1)
170+
%29 = tuple_extract %28 : $(Builtin.Int64, Builtin.Int1), 0
171+
%30 = tuple_extract %28 : $(Builtin.Int64, Builtin.Int1), 1
172+
cond_fail %30 : $Builtin.Int1
173+
%32 = builtin "cmp_slt_Int64"(%29 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
174+
%33 = load %10 : $*Builtin.Int64
175+
%34 = builtin "assumeNonNegative_Int64"(%33 : $Builtin.Int64) : $Builtin.Int64
176+
%35 = builtin "cmp_slt_Int64"(%29 : $Builtin.Int64, %34 : $Builtin.Int64) : $Builtin.Int1
177+
%36 = builtin "xor_Int1"(%35 : $Builtin.Int1, %21 : $Builtin.Int1) : $Builtin.Int1
178+
%37 = builtin "or_Int1"(%32 : $Builtin.Int1, %36 : $Builtin.Int1) : $Builtin.Int1
179+
cond_fail %37 : $Builtin.Int1
180+
%39 = builtin "truncOrBitCast_Int64_Word"(%29 : $Builtin.Int64) : $Builtin.Word
181+
%40 = index_addr %23 : $*Int, %39 : $Builtin.Word
182+
%41 = struct_element_addr %40 : $*Int, #Int._value
183+
%42 = load %41 : $*Builtin.Int64
184+
%43 = struct $Int (%42 : $Builtin.Int64)
185+
debug_value %43 : $Int, let, name "item"
186+
%global = begin_access [modify] [dynamic] [no_nested_conflict] %19 : $*Int
187+
%46 = builtin "cmp_eq_Int64"(%29 : $Builtin.Int64, %13 : $Builtin.Int64) : $Builtin.Int1
188+
%47 = builtin "int_expect_Int1"(%46 : $Builtin.Int1, %14 : $Builtin.Int1) : $Builtin.Int1
189+
cond_br %47, bbend1, bbend2
190+
191+
bbend1:
192+
store %43 to %global : $*Int
193+
end_access %global : $*Int
194+
cond_br %47, bb6, bb5
195+
196+
bbend2:
197+
%otherInt = struct $Int (%27 : $Builtin.Int64)
198+
store %otherInt to %global : $*Int
199+
cond_br %47, bbOut, bb5
200+
201+
bbOut:
202+
end_access %global : $*Int
203+
br bb6
204+
205+
bb5:
206+
br bb4(%29 : $Builtin.Int64)
207+
208+
bb6:
209+
br bbRet
210+
211+
bbRet:
212+
%25 = tuple ()
213+
return %25 : $()
214+
} // end sil function 'multi_end_licm_loop_exit'

0 commit comments

Comments
 (0)