Skip to content

Commit 18a8ed6

Browse files
committed
Fix a stack-discipline bug with inlining coroutines.
1 parent 5c03a0a commit 18a8ed6

File tree

4 files changed

+253
-3
lines changed

4 files changed

+253
-3
lines changed

include/swift/SILOptimizer/Utils/StackNesting.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,25 @@
2020

2121
namespace swift {
2222

23+
struct InstructionMatchResult {
24+
/// The instruction matches.
25+
bool matches;
26+
27+
/// The search should be halted.
28+
bool halt;
29+
};
30+
31+
using InstructionMatcher =
32+
llvm::function_ref<InstructionMatchResult(SILInstruction *i)>;
33+
34+
/// Given a predicate defining interesting instructions, check whether
35+
/// the stack is different at any of them from how it stood at the start
36+
/// of the search.
37+
///
38+
/// The matcher function must halt the search before any edges which would
39+
/// lead back to before the start point.
40+
bool hasStackDifferencesAt(SILInstruction *start, InstructionMatcher matcher);
41+
2342
/// A utility to correct the nesting of stack allocating/deallocating
2443
/// instructions.
2544
///

lib/SILOptimizer/Utils/SILInliner.cpp

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,54 @@
1717
#include "swift/SIL/TypeSubstCloner.h"
1818
#include "swift/SILOptimizer/Utils/CFG.h"
1919
#include "swift/SILOptimizer/Utils/SILOptFunctionBuilder.h"
20+
#include "swift/SILOptimizer/Utils/StackNesting.h"
2021
#include "llvm/ADT/STLExtras.h"
2122
#include "llvm/Support/Debug.h"
2223
using namespace swift;
2324

25+
/// Does the given coroutine make any stack allocations that are live across
26+
/// its yields?
27+
static bool allocatesStackAcrossYields(SILFunction *F) {
28+
assert(F->getLoweredFunctionType()->isCoroutine());
29+
30+
return hasStackDifferencesAt(&F->getEntryBlock()->front(),
31+
[](SILInstruction *i) -> InstructionMatchResult {
32+
if (isa<YieldInst>(i)) {
33+
return {
34+
/*matches*/ true,
35+
/*halt*/ i->getFunction()->getLoweredFunctionType()->getCoroutineKind()
36+
== SILCoroutineKind::YieldOnce
37+
};
38+
}
39+
40+
// Otherwise, search until the end of the function.
41+
return { false, false };
42+
});
43+
}
44+
45+
static bool isEndOfApply(SILInstruction *i, BeginApplyInst *beginApply) {
46+
if (auto endApply = dyn_cast<EndApplyInst>(i)) {
47+
return endApply->getBeginApply() == beginApply;
48+
} else if (auto abortApply = dyn_cast<AbortApplyInst>(i)) {
49+
return abortApply->getBeginApply() == beginApply;
50+
} else {
51+
return false;
52+
}
53+
}
54+
55+
/// Are there any stack differences from the given begin_apply to any
56+
/// corresponding end_apply/abort_apply?
57+
static bool hasStackDifferencesAtEnds(BeginApplyInst *apply) {
58+
return hasStackDifferencesAt(apply, [apply](SILInstruction *i)
59+
-> InstructionMatchResult {
60+
// Search for ends of the original apply. We can stop searching
61+
// at these points.
62+
if (isEndOfApply(i, apply))
63+
return { true, true };
64+
return { false, false };
65+
});
66+
}
67+
2468
static bool canInlineBeginApply(BeginApplyInst *BA) {
2569
// Don't inline if we have multiple resumption sites (i.e. end_apply or
2670
// abort_apply instructions). The current implementation clones a single
@@ -75,6 +119,7 @@ class BeginApplySite {
75119
SILBuilder *Builder;
76120
BeginApplyInst *BeginApply;
77121
bool HasYield = false;
122+
bool NeedsStackCorrection;
78123

79124
EndApplyInst *EndApply = nullptr;
80125
SILBasicBlock *EndApplyBB = nullptr;
@@ -86,15 +131,31 @@ class BeginApplySite {
86131

87132
public:
88133
BeginApplySite(BeginApplyInst *BeginApply, SILLocation Loc,
89-
SILBuilder *Builder)
90-
: Loc(Loc), Builder(Builder), BeginApply(BeginApply) {}
134+
SILBuilder *Builder, bool NeedsStackCorrection)
135+
: Loc(Loc), Builder(Builder), BeginApply(BeginApply),
136+
NeedsStackCorrection(NeedsStackCorrection) {}
91137

92138
static Optional<BeginApplySite> get(FullApplySite AI, SILLocation Loc,
93139
SILBuilder *Builder) {
94140
auto *BeginApply = dyn_cast<BeginApplyInst>(AI);
95141
if (!BeginApply)
96142
return None;
97-
return BeginApplySite(BeginApply, Loc, Builder);
143+
144+
// We need stack correction if there are both:
145+
// - stack allocations in the callee that are live across the yield and
146+
// - stack differences in the caller from the begin_apply to any
147+
// end_apply or abort_apply.
148+
// In these cases, naive cloning will cause the allocations to become
149+
// improperly nested.
150+
//
151+
// We need to compute this here before we do any splitting in the parent
152+
// function.
153+
bool NeedsStackCorrection = false;
154+
if (allocatesStackAcrossYields(BeginApply->getReferencedFunction()) &&
155+
hasStackDifferencesAtEnds(BeginApply))
156+
NeedsStackCorrection = true;
157+
158+
return BeginApplySite(BeginApply, Loc, Builder, NeedsStackCorrection);
98159
}
99160

100161
void preprocess(SILBasicBlock *returnToBB) {
@@ -222,6 +283,11 @@ class BeginApplySite {
222283
AbortApply->eraseFromParent();
223284

224285
assert(!BeginApply->hasUsesOfAnyResult());
286+
287+
// Correct the stack if necessary.
288+
if (NeedsStackCorrection) {
289+
StackNesting().correctStackNesting(BeginApply->getFunction());
290+
}
225291
}
226292
};
227293
} // namespace swift

lib/SILOptimizer/Utils/StackNesting.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,75 @@
1818

1919
using namespace swift;
2020

21+
bool swift::hasStackDifferencesAt(SILInstruction *start,
22+
InstructionMatcher matcher) {
23+
struct StackStatus {
24+
/// The number of currently-active stack allocations on this path.
25+
/// Because of the stack discipline, we only have to maintain a count.
26+
unsigned depth;
27+
28+
/// Whether we've popped anything off of the stack that was active
29+
/// at the start of the search.
30+
bool hasPops;
31+
32+
bool hasDifferences() const { return hasPops || depth != 0; }
33+
};
34+
35+
SmallPtrSet<SILBasicBlock*, 8> visited;
36+
SmallVector<std::pair<SILInstruction *, StackStatus>, 8> worklist;
37+
worklist.push_back({start, {0, false}});
38+
39+
while (!worklist.empty()) {
40+
// Pull a block and depth off the worklist.
41+
// Visitation order doesn't matter.
42+
auto pair = worklist.pop_back_val();
43+
auto status = pair.second;
44+
auto firstInst = pair.first;
45+
auto block = firstInst->getParent();
46+
47+
for (SILBasicBlock::iterator i(firstInst), e = block->end(); i != e; ++i) {
48+
auto match = matcher(&*i);
49+
50+
// If this is an interesting instruction, check for stack
51+
// differences here.
52+
if (match.matches && status.hasDifferences())
53+
return true;
54+
55+
// If we should halt, just go to the next work-list item, bypassing
56+
// visiting the successors.
57+
if (match.halt)
58+
goto nextWorkListItem; // a labelled continue would be nice
59+
60+
// Otherwise, do stack-depth bookkeeping.
61+
if (i->isAllocatingStack()) {
62+
status.depth++;
63+
} else if (i->isDeallocatingStack()) {
64+
// If the stack depth is already zero, we're deallocating a stack
65+
// allocation that was live into the search.
66+
if (status.depth > 0) {
67+
status.depth--;
68+
} else {
69+
status.hasPops = true;
70+
}
71+
}
72+
}
73+
74+
// Add the successors to the worklist.
75+
for (auto &succ : block->getSuccessors()) {
76+
auto succBlock = succ.getBB();
77+
auto insertResult = visited.insert(succBlock);
78+
if (insertResult.second) {
79+
worklist.push_back({&succBlock->front(), status});
80+
}
81+
}
82+
83+
nextWorkListItem: ;
84+
}
85+
86+
return false;
87+
}
88+
89+
2190
void StackNesting::setup(SILFunction *F) {
2291
SmallVector<BlockInfo *, 8> WorkList;
2392
llvm::DenseMap<SILBasicBlock *, BlockInfo *> BlockMapping;

test/SILOptimizer/inline_begin_apply.sil

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,99 @@ entry:
436436
%ret = tuple ()
437437
return %ret : $()
438438
}
439+
440+
sil [transparent] @stack_overlap : $@convention(thin) @yield_once () -> (@yields @inout Builtin.Int32) {
441+
entry:
442+
%marker = function_ref @marker : $@convention(thin) (Builtin.Int32) -> ()
443+
%temp = alloc_stack $Builtin.Int32
444+
%1000 = integer_literal $Builtin.Int32, 1000
445+
store %1000 to [trivial] %temp : $*Builtin.Int32
446+
%2000 = integer_literal $Builtin.Int32, 2000
447+
apply %marker(%2000) : $@convention(thin) (Builtin.Int32) -> ()
448+
yield %temp : $*Builtin.Int32, resume resume, unwind unwind
449+
450+
resume:
451+
%3000 = integer_literal $Builtin.Int32, 3000
452+
apply %marker(%3000) : $@convention(thin) (Builtin.Int32) -> ()
453+
dealloc_stack %temp : $*Builtin.Int32
454+
%4000 = integer_literal $Builtin.Int32, 4000
455+
apply %marker(%4000) : $@convention(thin) (Builtin.Int32) -> ()
456+
%ret = tuple ()
457+
return %ret : $()
458+
459+
unwind:
460+
dealloc_stack %temp : $*Builtin.Int32
461+
unwind
462+
}
463+
464+
// CHECK-LABEL: sil @test_stack_overlap_dealloc
465+
// CHECK: bb0:
466+
// CHECK-NEXT: [[A16:%.*]] = alloc_stack $Builtin.Int16
467+
// CHECK-NEXT: // function_ref
468+
// CHECK-NEXT: [[MARKER:%.*]] = function_ref @marker
469+
// CHECK-NEXT: [[A32:%.*]] = alloc_stack $Builtin.Int32
470+
// CHECK-NEXT: [[I1000:%.*]] = integer_literal $Builtin.Int32, 1000
471+
// CHECK-NEXT: store [[I1000]] to [trivial] [[A32]]
472+
// CHECK-NEXT: [[I2000:%.*]] = integer_literal $Builtin.Int32, 2000
473+
// CHECK-NEXT: apply [[MARKER]]([[I2000]])
474+
// CHECK-NEXT: br bb1
475+
// CHECK: bb1:
476+
// CHECK-NEXT: [[I3000:%.*]] = integer_literal $Builtin.Int32, 3000
477+
// CHECK-NEXT: apply [[MARKER]]([[I3000]])
478+
// CHECK-NEXT: dealloc_stack [[A32]] : $*Builtin.Int32
479+
// Note that this has been delayed to follow stack discipline.
480+
// CHECK-NEXT: dealloc_stack [[A16]] : $*Builtin.Int16
481+
// CHECK-NEXT: [[I4000:%.*]] = integer_literal $Builtin.Int32, 4000
482+
// CHECK-NEXT: apply [[MARKER]]([[I4000]])
483+
// CHECK-NEXT: tuple ()
484+
// CHECK-NEXT: br [[END:bb[0-9]+]]
485+
// CHECK: [[END]]:
486+
// CHECK-NEXT: [[RET:%.*]] = tuple ()
487+
// CHECK-NEXT: return [[RET]] : $()
488+
// CHECK-LABEL: // end sil function 'test_stack_overlap_dealloc'
489+
sil @test_stack_overlap_dealloc : $() -> () {
490+
bb0:
491+
%stack = alloc_stack $Builtin.Int16
492+
%0 = function_ref @stack_overlap : $@convention(thin) @yield_once () -> (@yields @inout Builtin.Int32)
493+
(%value, %token) = begin_apply %0() : $@convention(thin) @yield_once () -> (@yields @inout Builtin.Int32)
494+
dealloc_stack %stack : $*Builtin.Int16
495+
end_apply %token
496+
%ret = tuple ()
497+
return %ret : $()
498+
}
499+
500+
// CHECK-LABEL: sil @test_stack_overlap_alloc
501+
// CHECK: bb0:
502+
// CHECK-NEXT: // function_ref
503+
// CHECK-NEXT: [[MARKER:%.*]] = function_ref @marker
504+
// CHECK-NEXT: [[A32:%.*]] = alloc_stack $Builtin.Int32
505+
// CHECK-NEXT: [[I1000:%.*]] = integer_literal $Builtin.Int32, 1000
506+
// CHECK-NEXT: store [[I1000]] to [trivial] [[A32]]
507+
// CHECK-NEXT: [[I2000:%.*]] = integer_literal $Builtin.Int32, 2000
508+
// CHECK-NEXT: apply [[MARKER]]([[I2000]])
509+
// CHECK-NEXT: [[A16:%.*]] = alloc_stack $Builtin.Int16
510+
// CHECK-NEXT: br bb1
511+
// CHECK: bb1:
512+
// CHECK-NEXT: [[I3000:%.*]] = integer_literal $Builtin.Int32, 3000
513+
// CHECK-NEXT: apply [[MARKER]]([[I3000]])
514+
// CHECK-NEXT: [[I4000:%.*]] = integer_literal $Builtin.Int32, 4000
515+
// CHECK-NEXT: apply [[MARKER]]([[I4000]])
516+
// CHECK-NEXT: tuple ()
517+
// CHECK-NEXT: br [[END:bb[0-9]+]]
518+
// CHECK: [[END]]:
519+
// CHECK-NEXT: dealloc_stack [[A16]] : $*Builtin.Int16
520+
// Note that this has been delayed to follow stack discipline.
521+
// CHECK-NEXT: dealloc_stack [[A32]] : $*Builtin.Int32
522+
// CHECK-NEXT: [[RET:%.*]] = tuple ()
523+
// CHECK-NEXT: return [[RET]] : $()
524+
// CHECK-LABEL: // end sil function 'test_stack_overlap_alloc'
525+
sil @test_stack_overlap_alloc : $() -> () {
526+
bb0:
527+
%0 = function_ref @stack_overlap : $@convention(thin) @yield_once () -> (@yields @inout Builtin.Int32)
528+
(%value, %token) = begin_apply %0() : $@convention(thin) @yield_once () -> (@yields @inout Builtin.Int32)
529+
%stack = alloc_stack $Builtin.Int16
530+
end_apply %token
531+
dealloc_stack %stack : $*Builtin.Int16
532+
%ret = tuple ()
533+
return %ret : $()
534+
}

0 commit comments

Comments
 (0)