Skip to content

Commit 56928ba

Browse files
authored
Merge pull request #34593 from eeckstein/optimize_hte
[concurrency] SILOptimizer: optimize hop_to_executor instructions.
2 parents 6f60a17 + a47ebab commit 56928ba

File tree

6 files changed

+534
-0
lines changed

6 files changed

+534
-0
lines changed

include/swift/SIL/ApplySite.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ class ApplySite {
216216
return SILFunctionConventions(getSubstCalleeType(), getModule());
217217
}
218218

219+
bool isAsync() const {
220+
return getOrigCalleeType()->isAsync();
221+
}
222+
219223
/// Returns true if the callee function is annotated with
220224
/// @_semantics("programtermination_point")
221225
bool isCalleeKnownProgramTerminationPoint() const {

include/swift/SILOptimizer/PassManager/Passes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ PASS(HighLevelLICM, "high-level-licm",
206206
"Loop Invariant Code Motion in High-Level SIL")
207207
PASS(IVInfoPrinter, "iv-info-printer",
208208
"Print Induction Variable Information for Testing")
209+
PASS(OptimizeHopToExecutor, "optimize-hop-to-executor",
210+
"Optimize hop_to_executor instructions for actor isolated code")
209211
PASS(InstCount, "inst-count",
210212
"Record SIL Instruction, Block, and Function Counts as LLVM Statistics")
211213
PASS(JumpThreadSimplifyCFG, "jumpthread-simplify-cfg",

lib/SILOptimizer/Mandatory/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ target_sources(swiftSILOptimizer PRIVATE
1515
IRGenPrepare.cpp
1616
MandatoryInlining.cpp
1717
NestedSemanticFunctionCheck.cpp
18+
OptimizeHopToExecutor.cpp
1819
PredictableMemOpt.cpp
1920
PMOMemoryUseCollector.cpp
2021
RawSILInstLowering.cpp
Lines changed: 367 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
//===------- OptimizeHopToExecutor.cpp - optimize hop_to_executor ---------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#define DEBUG_TYPE "insert-hop-to-executor"
14+
#include "swift/SIL/SILBuilder.h"
15+
#include "swift/SIL/SILFunction.h"
16+
#include "swift/SIL/ApplySite.h"
17+
#include "swift/SIL/MemoryLifetime.h"
18+
#include "swift/SILOptimizer/PassManager/Transforms.h"
19+
#include "swift/SIL/MemAccessUtils.h"
20+
21+
using namespace swift;
22+
23+
namespace {
24+
25+
/// Optimizes hop_to_executor instructions.
26+
///
27+
/// * Redundant hop_to_executor elimination: if a hop_to_executor is dominated
28+
/// by another hop_to_executor with the same operand, it is eliminated:
29+
/// \code
30+
/// hop_to_executor %a
31+
/// ... // no suspension points
32+
/// hop_to_executor %a // can be eliminated
33+
/// \endcode
34+
///
35+
/// * Dead hop_to_executor elimination: if a hop_to_executor is not followed by
36+
/// any code which requires to run on its actor's executor, it is eliminated:
37+
/// \code
38+
/// hop_to_executor %a
39+
/// ... // no instruction which require to run on %a
40+
/// return
41+
/// \endcode
42+
class OptimizeHopToExecutor {
43+
44+
private:
45+
46+
typedef llvm::DenseMap<SILValue, int> Actors;
47+
48+
/// Basic-block specific information used for dataflow analysis.
49+
struct BlockState {
50+
enum {
51+
NotSet = -2,
52+
53+
// Used in the forward dataflow in removeRedundantHopToExecutors.
54+
Unknown = -1,
55+
56+
// Used in the backward dataflow in removeDeadHopToExecutors.
57+
ExecutorNeeded = Unknown,
58+
NoExecutorNeeded = 0,
59+
};
60+
61+
static_assert(ExecutorNeeded == Unknown,
62+
"needed for merge() to correctly merge ExecutorNeeded and NoExecutorNeeded");
63+
64+
/// The backlink to the SILBasicBlock.
65+
SILBasicBlock *block = nullptr;
66+
67+
/// The value at the entry (i.e. the first instruction) of the block.
68+
int entry = NotSet;
69+
70+
/// The value of the block itself. It's NotSet if the block has no
71+
/// significant instructions for the dataflow.
72+
int intra = NotSet;
73+
74+
/// The value at the exit (i.e. after the terminator) of the block.
75+
int exit = NotSet;
76+
77+
/// Merge two values at a control-flow merge point.
78+
static int merge(int lhs, int rhs) {
79+
if (lhs == NotSet || lhs == rhs)
80+
return rhs;
81+
if (rhs == NotSet)
82+
return lhs;
83+
return Unknown;
84+
}
85+
};
86+
87+
SILFunction *function;
88+
89+
/// All block states.
90+
std::vector<BlockState> blockStates;
91+
92+
llvm::DenseMap<SILBasicBlock *, BlockState *> block2State;
93+
94+
void collectActors(Actors &actors);
95+
96+
void allocateBlockStates();
97+
98+
void solveDataflowForward();
99+
void solveDataflowBackward();
100+
101+
bool removeRedundantHopToExecutors(const Actors &actors);
102+
103+
bool removeDeadHopToExecutors();
104+
105+
static void updateNeedExecutor(int &needExecutor, SILInstruction *inst);
106+
static bool needsExecutor(SILInstruction *inst);
107+
static bool isGlobalMemory(SILValue addr);
108+
109+
public:
110+
111+
OptimizeHopToExecutor(SILFunction *function) : function(function) { }
112+
113+
/// The entry point to the transformation.
114+
bool run();
115+
116+
void dump();
117+
};
118+
119+
/// Search for hop_to_executor instructions and add their operands to \p actors.
120+
void OptimizeHopToExecutor::collectActors(Actors &actors) {
121+
for (SILBasicBlock &block : *function) {
122+
for (SILInstruction &inst : block) {
123+
if (auto *hop = dyn_cast<HopToExecutorInst>(&inst)) {
124+
int idx = actors.size();
125+
actors[hop->getOperand()] = idx;
126+
}
127+
}
128+
}
129+
}
130+
131+
/// Initialize blockStates and block2State.
132+
void OptimizeHopToExecutor::allocateBlockStates() {
133+
// Resizing is mandatory! Just adding states with push_back would potentially
134+
// invalidate previous pointers to states, which are stored in block2State.
135+
blockStates.resize(function->size());
136+
137+
for (auto blockAndIdx : llvm::enumerate(*function)) {
138+
BlockState *state = &blockStates[blockAndIdx.index()];
139+
state->block = &blockAndIdx.value();
140+
block2State[&blockAndIdx.value()] = state;
141+
}
142+
}
143+
144+
/// Solve the dataflow in forward direction.
145+
void OptimizeHopToExecutor::solveDataflowForward() {
146+
bool changed = false;
147+
do {
148+
changed = false;
149+
for (BlockState &state : blockStates) {
150+
int newEntry = state.entry;
151+
for (SILBasicBlock *pred : state.block->getPredecessorBlocks()) {
152+
newEntry = BlockState::merge(newEntry, block2State[pred]->exit);
153+
}
154+
if (newEntry != state.entry || state.exit == BlockState::NotSet) {
155+
changed = true;
156+
state.entry = newEntry;
157+
if (state.intra == BlockState::NotSet)
158+
state.exit = state.entry;
159+
}
160+
}
161+
} while (changed);
162+
}
163+
164+
/// Solve the dataflow in backward direction.
165+
void OptimizeHopToExecutor::solveDataflowBackward() {
166+
bool changed = false;
167+
do {
168+
changed = false;
169+
for (BlockState &state : llvm::reverse(blockStates)) {
170+
int newExit = state.exit;
171+
for (SILBasicBlock *succ : state.block->getSuccessorBlocks()) {
172+
newExit = BlockState::merge(newExit, block2State[succ]->entry);
173+
}
174+
if (newExit != state.exit || state.entry == BlockState::NotSet) {
175+
changed = true;
176+
state.exit = newExit;
177+
if (state.intra == BlockState::NotSet)
178+
state.entry = state.exit;
179+
}
180+
}
181+
} while (changed);
182+
}
183+
184+
/// Returns true if \p inst is a suspension point or an async call.
185+
static bool isSuspentionPoint(SILInstruction *inst) {
186+
if (auto applySite = FullApplySite::isa(inst)) {
187+
if (applySite.isAsync())
188+
return true;
189+
return false;
190+
}
191+
if (isa<AwaitAsyncContinuationInst>(inst))
192+
return true;
193+
return false;
194+
}
195+
196+
/// Remove hop_to_executor instructions which are dominated by another
197+
/// hop_to_executor with the same operand.
198+
/// See the top-level comment on OptimizeHopToExecutor for details.
199+
bool OptimizeHopToExecutor::removeRedundantHopToExecutors(const Actors &actors) {
200+
201+
// Initialize the dataflow.
202+
for (BlockState &state : blockStates) {
203+
state.entry = (state.block == function->getEntryBlock() ?
204+
BlockState::Unknown : BlockState::NotSet);
205+
state.intra = BlockState::NotSet;
206+
for (SILInstruction &inst : *state.block) {
207+
if (isSuspentionPoint(&inst)) {
208+
// A suspension point (like an async call) can switch to another
209+
// executor.
210+
state.intra = BlockState::Unknown;
211+
} else if (auto *hop = dyn_cast<HopToExecutorInst>(&inst)) {
212+
state.intra = actors.lookup(hop->getOperand());
213+
}
214+
}
215+
state.exit = state.intra;
216+
}
217+
218+
solveDataflowForward();
219+
220+
// Last step: do the transformation.
221+
bool changed = false;
222+
for (BlockState &state : blockStates) {
223+
// Iterating over all instructions is the same logic as above, just start
224+
// with the final entry-value.
225+
int actorIdx = state.entry;
226+
for (auto iter = state.block->begin(); iter != state.block->end();) {
227+
SILInstruction *inst = &*iter++;
228+
if (isSuspentionPoint(inst)) {
229+
actorIdx = BlockState::Unknown;
230+
continue;
231+
}
232+
if (auto *hop = dyn_cast<HopToExecutorInst>(inst)) {
233+
int newActorIdx = actors.lookup(hop->getOperand());
234+
if (newActorIdx == actorIdx) {
235+
// There is a dominating hop_to_executor with the same operand.
236+
hop->eraseFromParent();
237+
changed = true;
238+
continue;
239+
}
240+
actorIdx = newActorIdx;
241+
continue;
242+
}
243+
}
244+
assert(actorIdx == state.exit);
245+
}
246+
return changed;
247+
}
248+
249+
/// Remove hop_to_executor instructions which are not followed by any code which
250+
/// requires to run on the actor's executor.
251+
/// See the top-level comment on OptimizeHopToExecutor for details.
252+
bool OptimizeHopToExecutor::removeDeadHopToExecutors() {
253+
254+
// Initialize the dataflow: go bottom up and if we see any instruction which
255+
// might require a dedicated executor, don't remove a preceeding
256+
// hop_to_executor instruction.
257+
for (BlockState &state : blockStates) {
258+
state.exit = (state.block->getTerminator()->isFunctionExiting() ?
259+
BlockState::NoExecutorNeeded : BlockState::NotSet);
260+
state.intra = BlockState::NotSet;
261+
for (SILInstruction &inst : llvm::reverse(*state.block)) {
262+
updateNeedExecutor(state.intra, &inst);
263+
}
264+
state.entry = state.intra;
265+
}
266+
267+
solveDataflowBackward();
268+
269+
// Last step: do the transformation.
270+
bool changed = false;
271+
for (BlockState &state : blockStates) {
272+
// Iterating over all instructions is the same logic as above, just start
273+
// with the final exit-value.
274+
int needActor = state.exit;
275+
for (auto iter = state.block->rbegin(); iter != state.block->rend();) {
276+
SILInstruction *inst = &*iter++;
277+
auto *hop = dyn_cast<HopToExecutorInst>(inst);
278+
if (hop && needActor == BlockState::NoExecutorNeeded) {
279+
// Remove the dead hop_to_executor.
280+
hop->eraseFromParent();
281+
changed = true;
282+
continue;
283+
}
284+
updateNeedExecutor(needActor, inst);
285+
}
286+
assert(needActor == state.entry);
287+
}
288+
return changed;
289+
}
290+
291+
/// Updates \p needExecutor for the dataflow evaluation.
292+
void OptimizeHopToExecutor::updateNeedExecutor(int &needExecutor,
293+
SILInstruction *inst) {
294+
if (isa<HopToExecutorInst>(inst)) {
295+
needExecutor = BlockState::NoExecutorNeeded;
296+
return;
297+
}
298+
if (isSuspentionPoint(inst)) {
299+
needExecutor = BlockState::NoExecutorNeeded;
300+
return;
301+
}
302+
if (needsExecutor(inst))
303+
needExecutor = BlockState::ExecutorNeeded;
304+
}
305+
306+
/// Returns true if \p inst needs to run on a specific executor.
307+
bool OptimizeHopToExecutor::needsExecutor(SILInstruction *inst) {
308+
// TODO: Is this the correct thing to check?
309+
if (auto *load = dyn_cast<LoadInst>(inst)) {
310+
return isGlobalMemory(load->getOperand());
311+
}
312+
if (auto *store = dyn_cast<StoreInst>(inst)) {
313+
return isGlobalMemory(store->getDest());
314+
}
315+
if (auto *copy = dyn_cast<CopyAddrInst>(inst)) {
316+
return isGlobalMemory(copy->getSrc()) || isGlobalMemory(copy->getDest());
317+
}
318+
return inst->mayReadOrWriteMemory();
319+
}
320+
321+
bool OptimizeHopToExecutor::isGlobalMemory(SILValue addr) {
322+
// TODO: use esacpe analysis to rule out locally allocated non-stack objects.
323+
SILValue base = getAccessBase(addr);
324+
return !isa<AllocStackInst>(base);
325+
}
326+
327+
bool OptimizeHopToExecutor::run() {
328+
Actors actors;
329+
collectActors(actors);
330+
if (actors.empty())
331+
return false;
332+
333+
allocateBlockStates();
334+
335+
bool changed = removeRedundantHopToExecutors(actors);
336+
changed |= removeDeadHopToExecutors();
337+
338+
return changed;
339+
}
340+
341+
LLVM_ATTRIBUTE_USED void OptimizeHopToExecutor::dump() {
342+
for (BlockState &state : blockStates) {
343+
llvm::dbgs() << "bb" << state.block->getDebugID() <<
344+
": entry=" << state.entry <<
345+
", intra=" << state.intra <<
346+
", exit=" << state.exit << '\n';
347+
}
348+
}
349+
350+
class OptimizeHopToExecutorPass : public SILFunctionTransform {
351+
352+
/// The entry point to the transformation.
353+
void run() override {
354+
if (!getFunction()->isAsync())
355+
return;
356+
357+
OptimizeHopToExecutor optimizeHopToExecutor(getFunction());
358+
if (optimizeHopToExecutor.run())
359+
invalidateAnalysis(SILAnalysis::InvalidationKind::Instructions);
360+
}
361+
};
362+
363+
} // end anonymous namespace
364+
365+
SILTransform *swift::createOptimizeHopToExecutor() {
366+
return new OptimizeHopToExecutorPass();
367+
}

0 commit comments

Comments
 (0)