@@ -960,6 +960,155 @@ static void splitAround(Instruction *I, const Twine &Name) {
960
960
splitBlockIfNotFirst (I->getNextNode (), " After" + Name);
961
961
}
962
962
963
+ static bool isSuspendBlock (BasicBlock *BB) {
964
+ return isa<AnyCoroSuspendInst>(BB->front ());
965
+ }
966
+
967
+ typedef SmallPtrSet<BasicBlock*, 8 > VisitedBlocksSet;
968
+
969
+ // / Does control flow starting at the given block ever reach a suspend
970
+ // / instruction before reaching a block in VisitedOrFreeBBs?
971
+ static bool isSuspendReachableFrom (BasicBlock *From,
972
+ VisitedBlocksSet &VisitedOrFreeBBs) {
973
+ // Eagerly try to add this block to the visited set. If it's already
974
+ // there, stop recursing; this path doesn't reach a suspend before
975
+ // either looping or reaching a freeing block.
976
+ if (!VisitedOrFreeBBs.insert (From).second )
977
+ return false ;
978
+
979
+ // We assume that we'll already have split suspends into their own blocks.
980
+ if (isSuspendBlock (From))
981
+ return true ;
982
+
983
+ // Recurse on the successors.
984
+ for (auto Succ : successors (From)) {
985
+ if (isSuspendReachableFrom (Succ, VisitedOrFreeBBs))
986
+ return true ;
987
+ }
988
+
989
+ return false ;
990
+ }
991
+
992
+ // / Is the given alloca "local", i.e. bounded in lifetime to not cross a
993
+ // / suspend point?
994
+ static bool isLocalAlloca (CoroAllocaAllocInst *AI) {
995
+ // Seed the visited set with all the basic blocks containing a free
996
+ // so that we won't pass them up.
997
+ VisitedBlocksSet VisitedOrFreeBBs;
998
+ for (auto User : AI->users ()) {
999
+ if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1000
+ VisitedOrFreeBBs.insert (FI->getParent ());
1001
+ }
1002
+
1003
+ return !isSuspendReachableFrom (AI->getParent (), VisitedOrFreeBBs);
1004
+ }
1005
+
1006
+ // / After we split the coroutine, will the given basic block be along
1007
+ // / an obvious exit path for the resumption function?
1008
+ static bool willLeaveFunctionImmediatelyAfter (BasicBlock *BB,
1009
+ unsigned depth = 3 ) {
1010
+ // If we've bottomed out our depth count, stop searching and assume
1011
+ // that the path might loop back.
1012
+ if (depth == 0 ) return false ;
1013
+
1014
+ // If this is a suspend block, we're about to exit the resumption function.
1015
+ if (isSuspendBlock (BB)) return true ;
1016
+
1017
+ // Recurse into the successors.
1018
+ for (auto Succ : successors (BB)) {
1019
+ if (!willLeaveFunctionImmediatelyAfter (Succ, depth - 1 ))
1020
+ return false ;
1021
+ }
1022
+
1023
+ // If none of the successors leads back in a loop, we're on an exit/abort.
1024
+ return true ;
1025
+ }
1026
+
1027
+ static bool localAllocaNeedsStackSave (CoroAllocaAllocInst *AI) {
1028
+ // Look for a free that isn't sufficiently obviously followed by
1029
+ // either a suspend or a termination, i.e. something that will leave
1030
+ // the coro resumption frame.
1031
+ for (auto U : AI->users ()) {
1032
+ auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1033
+ if (!FI) continue ;
1034
+
1035
+ if (!willLeaveFunctionImmediatelyAfter (FI->getParent ()))
1036
+ return true ;
1037
+ }
1038
+
1039
+ // If we never found one, we don't need a stack save.
1040
+ return false ;
1041
+ }
1042
+
1043
+ // / Turn each of the given local allocas into a normal (dynamic) alloca
1044
+ // / instruction.
1045
+ static void lowerLocalAllocas (ArrayRef<CoroAllocaAllocInst*> LocalAllocas) {
1046
+ for (auto AI : LocalAllocas) {
1047
+ auto M = AI->getModule ();
1048
+ IRBuilder<> Builder (AI);
1049
+
1050
+ // Save the stack depth. Try to avoid doing this if the stackrestore
1051
+ // is going to immediately precede a return or something.
1052
+ Value *StackSave = nullptr ;
1053
+ if (localAllocaNeedsStackSave (AI))
1054
+ StackSave = Builder.CreateCall (
1055
+ Intrinsic::getDeclaration (M, Intrinsic::stacksave));
1056
+
1057
+ // Allocate memory.
1058
+ auto Alloca = Builder.CreateAlloca (Builder.getInt8Ty (), AI->getSize ());
1059
+ Alloca->setAlignment (AI->getAlignment ());
1060
+
1061
+ for (auto U : AI->users ()) {
1062
+ // Replace gets with the allocation.
1063
+ if (isa<CoroAllocaGetInst>(U)) {
1064
+ U->replaceAllUsesWith (Alloca);
1065
+
1066
+ // Replace frees with stackrestores. This is safe because
1067
+ // alloca.alloc is required to obey a stack discipline, although we
1068
+ // don't enforce that structurally.
1069
+ } else {
1070
+ auto FI = cast<CoroAllocaFreeInst>(U);
1071
+ if (StackSave) {
1072
+ Builder.SetInsertPoint (FI);
1073
+ Builder.CreateCall (
1074
+ Intrinsic::getDeclaration (M, Intrinsic::stackrestore),
1075
+ StackSave);
1076
+ }
1077
+ }
1078
+ cast<Instruction>(U)->eraseFromParent ();
1079
+ }
1080
+
1081
+ AI->eraseFromParent ();
1082
+ }
1083
+ }
1084
+
1085
+ // / Turn the given coro.alloca.alloc call into a dynamic allocation.
1086
+ // / This happens during the all-instructions iteration, so it must not
1087
+ // / delete the call.
1088
+ static Instruction *lowerNonLocalAlloca (CoroAllocaAllocInst *AI,
1089
+ coro::Shape &Shape,
1090
+ SmallVectorImpl<Instruction*> &DeadInsts) {
1091
+ IRBuilder<> Builder (AI);
1092
+ auto Alloc = Shape.emitAlloc (Builder, AI->getSize (), nullptr );
1093
+
1094
+ for (User *U : AI->users ()) {
1095
+ if (isa<CoroAllocaGetInst>(U)) {
1096
+ U->replaceAllUsesWith (Alloc);
1097
+ } else {
1098
+ auto FI = cast<CoroAllocaFreeInst>(U);
1099
+ Builder.SetInsertPoint (FI);
1100
+ Shape.emitDealloc (Builder, Alloc, nullptr );
1101
+ }
1102
+ DeadInsts.push_back (cast<Instruction>(U));
1103
+ }
1104
+
1105
+ // Push this on last so that it gets deleted after all the others.
1106
+ DeadInsts.push_back (AI);
1107
+
1108
+ // Return the new allocation value so that we can check for needed spills.
1109
+ return cast<Instruction>(Alloc);
1110
+ }
1111
+
963
1112
void coro::buildCoroutineFrame (Function &F, Shape &Shape) {
964
1113
// Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
965
1114
// access to local variables.
@@ -992,6 +1141,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
992
1141
993
1142
IRBuilder<> Builder (F.getContext ());
994
1143
SpillInfo Spills;
1144
+ SmallVector<CoroAllocaAllocInst*, 4 > LocalAllocas;
1145
+ SmallVector<Instruction*, 4 > DeadInstructions;
995
1146
996
1147
for (int Repeat = 0 ; Repeat < 4 ; ++Repeat) {
997
1148
// See if there are materializable instructions across suspend points.
@@ -1021,12 +1172,35 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1021
1172
// of the Coroutine Frame.
1022
1173
if (isCoroutineStructureIntrinsic (I) || &I == Shape.CoroBegin )
1023
1174
continue ;
1175
+
1024
1176
// The Coroutine Promise always included into coroutine frame, no need to
1025
1177
// check for suspend crossing.
1026
1178
if (Shape.ABI == coro::ABI::Switch &&
1027
1179
Shape.SwitchLowering .PromiseAlloca == &I)
1028
1180
continue ;
1029
1181
1182
+ // Handle alloca.alloc specially here.
1183
+ if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
1184
+ // Check whether the alloca's lifetime is bounded by suspend points.
1185
+ if (isLocalAlloca (AI)) {
1186
+ LocalAllocas.push_back (AI);
1187
+ continue ;
1188
+ }
1189
+
1190
+ // If not, do a quick rewrite of the alloca and then add spills of
1191
+ // the rewritten value. The rewrite doesn't invalidate anything in
1192
+ // Spills because the other alloca intrinsics have no other operands
1193
+ // besides AI, and it doesn't invalidate the iteration because we delay
1194
+ // erasing AI.
1195
+ auto Alloc = lowerNonLocalAlloca (AI, Shape, DeadInstructions);
1196
+
1197
+ for (User *U : Alloc->users ()) {
1198
+ if (Checker.isDefinitionAcrossSuspend (*Alloc, U))
1199
+ Spills.emplace_back (Alloc, U);
1200
+ }
1201
+ continue ;
1202
+ }
1203
+
1030
1204
for (User *U : I.users ())
1031
1205
if (Checker.isDefinitionAcrossSuspend (I, U)) {
1032
1206
// We cannot spill a token.
@@ -1040,4 +1214,8 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1040
1214
moveSpillUsesAfterCoroBegin (F, Spills, Shape.CoroBegin );
1041
1215
Shape.FrameTy = buildFrameType (F, Shape, Spills);
1042
1216
Shape.FramePtr = insertSpills (Spills, Shape);
1217
+ lowerLocalAllocas (LocalAllocas);
1218
+
1219
+ for (auto I : DeadInstructions)
1220
+ I->eraseFromParent ();
1043
1221
}
0 commit comments