Skip to content

[Coroutines] Improve debugging and minor refactoring #104642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Coroutines/CoroEarly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ void Lowerer::lowerEarlyIntrinsics(Function &F) {
if (auto *CII = cast<CoroIdInst>(&I)) {
if (CII->getInfo().isPreSplit()) {
assert(F.isPresplitCoroutine() &&
"The frontend uses Swtich-Resumed ABI should emit "
"The frontend uses Switch-Resumed ABI should emit "
"\"presplitcoroutine\" attribute for the coroutine.");
setCannotDuplicate(CII);
CII->setCoroutineSelf();
Expand Down
163 changes: 98 additions & 65 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
// This file contains classes used to discover if for a particular value
// there from sue to definition that crosses a suspend block.
// its definition precedes and its uses follow a suspend block. This is
// referred to as a suspend crossing value.
//
// Using the information discovered we form a Coroutine Frame structure to
// contain those values. All uses of those values are replaced with appropriate
Expand Down Expand Up @@ -124,7 +125,8 @@ class SuspendCrossingInfo {
public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const;
void dump(StringRef Label, BitVector const &BV) const;
void dump(StringRef Label, BitVector const &BV,
const ReversePostOrderTraversal<Function *> &RPOT) const;
#endif

SuspendCrossingInfo(Function &F, coro::Shape &Shape);
Expand Down Expand Up @@ -207,21 +209,41 @@ class SuspendCrossingInfo {
} // end anonymous namespace

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
BitVector const &BV) const {
static std::string getBasicBlockLabel(const BasicBlock *BB) {
if (BB->hasName())
return BB->getName().str();

std::string S;
raw_string_ostream OS(S);
BB->printAsOperand(OS, false);
return OS.str().substr(1);
}

LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(
StringRef Label, BitVector const &BV,
const ReversePostOrderTraversal<Function *> &RPOT) const {
dbgs() << Label << ":";
for (size_t I = 0, N = BV.size(); I < N; ++I)
if (BV[I])
dbgs() << " " << Mapping.indexToBlock(I)->getName();
for (const BasicBlock *BB : RPOT) {
auto BBNo = Mapping.blockToIndex(BB);
if (BV[BBNo])
dbgs() << " " << getBasicBlockLabel(BB);
}
dbgs() << "\n";
}

LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
for (size_t I = 0, N = Block.size(); I < N; ++I) {
BasicBlock *const B = Mapping.indexToBlock(I);
dbgs() << B->getName() << ":\n";
dump(" Consumes", Block[I].Consumes);
dump(" Kills", Block[I].Kills);
if (Block.empty())
return;

BasicBlock *const B = Mapping.indexToBlock(0);
Function *F = B->getParent();

ReversePostOrderTraversal<Function *> RPOT(F);
for (const BasicBlock *BB : RPOT) {
auto BBNo = Mapping.blockToIndex(BB);
dbgs() << getBasicBlockLabel(BB) << ":\n";
dump(" Consumes", Block[BBNo].Consumes, RPOT);
dump(" Kills", Block[BBNo].Kills, RPOT);
}
dbgs() << "\n";
}
Expand Down Expand Up @@ -418,10 +440,7 @@ struct RematGraph {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const {
dbgs() << "Entry (";
if (EntryNode->Node->getParent()->hasName())
dbgs() << EntryNode->Node->getParent()->getName();
else
EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
dbgs() << getBasicBlockLabel(EntryNode->Node->getParent());
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@yuxuanchen1997 I restricted use of getBasicBlockName to dump methods. Please notice that similar code already appeared in the dump method for RematGraph.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My only concern was the unnecessary copy of basic block name. You can do something with a macro like

#define BB_NAME(BB) (BB->hasName() ? BB->getName() : StringRef(getBasicBlockLabel(BB)))

This uses lifetime extension rules to make the latter remain allocated while used as a temporary.
But this is not just ugly, but also unsafe to be stored to a variable. A better way to do this is through a type similar to a tagged union of StringRef and std::string.

There is probably no point in optimizing this for a single dump method. With these reasons, I am not against landing this now. But since you mentioned that RematGraph is also using similar patterns, there might be motivation to solve it systematically. (e.g. BasicBlock::getDebugName())

Copy link
Collaborator Author

@TylerNowicki TylerNowicki Sep 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI I have a change queued up to eliminate the str copies

TylerNowicki#8

dbgs() << ") : " << *EntryNode->Node << "\n";
for (auto &E : Remats) {
dbgs() << *(E.first) << "\n";
Expand Down Expand Up @@ -551,7 +570,7 @@ struct FrameDataInfo {

#ifndef NDEBUG
static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
dbgs() << "------------- " << Title << "--------------\n";
dbgs() << "------------- " << Title << " --------------\n";
for (const auto &E : Spills) {
E.first->dump();
dbgs() << " user: ";
Expand Down Expand Up @@ -813,7 +832,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
StackLifetime::LivenessType::May);
StackLifetimeAnalyzer.run();
auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
auto DoAllocasInterfere = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
StackLifetimeAnalyzer.getLiveRange(AI2));
};
Expand All @@ -833,13 +852,13 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
for (const auto &A : FrameData.Allocas) {
AllocaInst *Alloca = A.Alloca;
bool Merged = false;
// Try to find if the Alloca is not inferenced with any existing
// Try to find if the Alloca does not interfere with any existing
// NonOverlappedAllocaSet. If it is true, insert the alloca to that
// NonOverlappedAllocaSet.
for (auto &AllocaSet : NonOverlapedAllocas) {
assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
bool NoInference = none_of(AllocaSet, [&](auto Iter) {
return IsAllocaInferenre(Alloca, Iter);
bool NoInterference = none_of(AllocaSet, [&](auto Iter) {
return DoAllocasInterfere(Alloca, Iter);
});
// If the alignment of A is multiple of the alignment of B, the address
// of A should satisfy the requirement for aligning for B.
Expand All @@ -852,7 +871,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
0;
}();
bool CouldMerge = NoInference && Alignable;
bool CouldMerge = NoInterference && Alignable;
if (!CouldMerge)
continue;
AllocaSet.push_back(Alloca);
Expand Down Expand Up @@ -1713,6 +1732,51 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
return CleanupRet;
}

static BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape,
Value *Def,
const DominatorTree &DT) {
BasicBlock::iterator InsertPt;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
// the coroutine frame pointer instruction, i.e. coro.begin.
InsertPt = Shape.getInsertPtAfterFramePtr();

// If we're spilling an Argument, make sure we clear 'nocapture'
// from the coroutine function.
Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
// Don't spill immediately after a suspend; splitting assumes
// that the suspend will be followed by a branch.
InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
} else {
auto *I = cast<Instruction>(Def);
if (!DT.dominates(Shape.CoroBegin, I)) {
// If it is not dominated by CoroBegin, then spill should be
// inserted immediately after CoroFrame is computed.
InsertPt = Shape.getInsertPtAfterFramePtr();
} else if (auto *II = dyn_cast<InvokeInst>(I)) {
// If we are spilling the result of the invoke instruction, split
// the normal edge and insert the spill in the new block.
auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
InsertPt = NewBB->getTerminator()->getIterator();
} else if (isa<PHINode>(I)) {
// Skip the PHINodes and EH pads instructions.
BasicBlock *DefBlock = I->getParent();
if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
else
InsertPt = DefBlock->getFirstInsertionPt();
} else {
assert(!I->isTerminator() && "unexpected terminator");
// For all other values, the spill is placed immediately after
// the definition.
InsertPt = I->getNextNode()->getIterator();
}
}

return InsertPt;
}

// Replace all alloca and SSA values that are accessed across suspend points
// with GetElementPointer from coroutine frame + loads and stores. Create an
// AllocaSpillBB that will become the new entry block for the resume parts of
Expand All @@ -1735,9 +1799,8 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
//
//
static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
Function *F = CB->getFunction();
LLVMContext &C = Shape.CoroBegin->getContext();
Function *F = Shape.CoroBegin->getFunction();
IRBuilder<> Builder(C);
StructType *FrameTy = Shape.FrameTy;
Value *FramePtr = Shape.FramePtr;
Expand Down Expand Up @@ -1798,47 +1861,16 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto SpillAlignment = Align(FrameData.getAlign(Def));
// Create a store instruction storing the value into the
// coroutine frame.
BasicBlock::iterator InsertPt;
BasicBlock::iterator InsertPt = getSpillInsertionPt(Shape, Def, DT);

Type *ByValTy = nullptr;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
// the coroutine frame pointer instruction, i.e. coro.begin.
InsertPt = Shape.getInsertPtAfterFramePtr();

// If we're spilling an Argument, make sure we clear 'nocapture'
// from the coroutine function.
Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);

if (Arg->hasByValAttr())
ByValTy = Arg->getParamByValType();
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
// Don't spill immediately after a suspend; splitting assumes
// that the suspend will be followed by a branch.
InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt();
} else {
auto *I = cast<Instruction>(Def);
if (!DT.dominates(CB, I)) {
// If it is not dominated by CoroBegin, then spill should be
// inserted immediately after CoroFrame is computed.
InsertPt = Shape.getInsertPtAfterFramePtr();
} else if (auto *II = dyn_cast<InvokeInst>(I)) {
// If we are spilling the result of the invoke instruction, split
// the normal edge and insert the spill in the new block.
auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
InsertPt = NewBB->getTerminator()->getIterator();
} else if (isa<PHINode>(I)) {
// Skip the PHINodes and EH pads instructions.
BasicBlock *DefBlock = I->getParent();
if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
InsertPt = splitBeforeCatchSwitch(CSI)->getIterator();
else
InsertPt = DefBlock->getFirstInsertionPt();
} else {
assert(!I->isTerminator() && "unexpected terminator");
// For all other values, the spill is placed immediately after
// the definition.
InsertPt = I->getNextNode()->getIterator();
}
}

auto Index = FrameData.getFieldIndex(Def);
Expand Down Expand Up @@ -1980,7 +2012,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
UsersToUpdate.clear();
for (User *U : Alloca->users()) {
auto *I = cast<Instruction>(U);
if (DT.dominates(CB, I))
if (DT.dominates(Shape.CoroBegin, I))
UsersToUpdate.push_back(I);
}
if (UsersToUpdate.empty())
Expand Down Expand Up @@ -2022,7 +2054,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
Builder.CreateStore(Value, G);
}
// For each alias to Alloca created before CoroBegin but used after
// CoroBegin, we recreate them after CoroBegin by appplying the offset
// CoroBegin, we recreate them after CoroBegin by applying the offset
// to the pointer in the frame.
for (const auto &Alias : A.Aliases) {
auto *FramePtr = GetFramePointer(Alloca);
Expand All @@ -2031,7 +2063,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto *AliasPtr =
Builder.CreatePtrAdd(FramePtr, ConstantInt::get(ITy, Value));
Alias.first->replaceUsesWithIf(
AliasPtr, [&](Use &U) { return DT.dominates(CB, U); });
AliasPtr, [&](Use &U) { return DT.dominates(Shape.CoroBegin, U); });
}
}

Expand All @@ -2044,7 +2076,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
// If there is memory accessing to promise alloca before CoroBegin;
bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
auto *Inst = dyn_cast<Instruction>(U.getUser());
if (!Inst || DT.dominates(CB, Inst))
if (!Inst || DT.dominates(Shape.CoroBegin, Inst))
return false;

if (auto *CI = dyn_cast<CallInst>(Inst)) {
Expand Down Expand Up @@ -2690,7 +2722,7 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
}
}

/// retcon and retcon.once conventions assume that all spill uses can be sunk
/// Async and Retcon{Once} conventions assume that all spill uses can be sunk
/// after the coro.begin intrinsic.
static void sinkSpillUsesAfterCoroBegin(Function &F,
const FrameDataInfo &FrameData,
Expand Down Expand Up @@ -2726,7 +2758,7 @@ static void sinkSpillUsesAfterCoroBegin(Function &F,
// Sort by dominance.
SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
// If a dominates b it should preceed (<) b.
// If a dominates b it should precede (<) b.
return Dom.dominates(A, B);
});

Expand Down Expand Up @@ -3122,7 +3154,7 @@ void coro::buildCoroutineFrame(
cleanupSinglePredPHIs(F);

// Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
// never has its definition separated from the PHI by the suspend point.
// never have its definition separated from the PHI by the suspend point.
rewritePHIs(F);

// Build suspend crossing info.
Expand Down Expand Up @@ -3219,6 +3251,7 @@ void coro::buildCoroutineFrame(
Shape.FramePtr = Shape.CoroBegin;
// For now, this works for C++ programs only.
buildFrameDebugInfo(F, Shape, FrameData);
// Insert spills and reloads
insertSpills(FrameData, Shape);
lowerLocalAllocas(LocalAllocas, DeadInstructions);

Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,11 +1219,10 @@ static void postSplitCleanup(Function &F) {
// frame if possible.
static void handleNoSuspendCoroutine(coro::Shape &Shape) {
auto *CoroBegin = Shape.CoroBegin;
auto *CoroId = CoroBegin->getId();
auto *AllocInst = CoroId->getCoroAlloc();
switch (Shape.ABI) {
case coro::ABI::Switch: {
auto SwitchId = cast<CoroIdInst>(CoroId);
auto SwitchId = Shape.getSwitchCoroId();
auto *AllocInst = SwitchId->getCoroAlloc();
coro::replaceCoroFree(SwitchId, /*Elide=*/AllocInst != nullptr);
if (AllocInst) {
IRBuilder<> Builder(AllocInst);
Expand Down Expand Up @@ -1687,7 +1686,7 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
auto &Context = F.getContext();
auto *Int8PtrTy = PointerType::getUnqual(Context);

auto *Id = cast<CoroIdAsyncInst>(Shape.CoroBegin->getId());
auto *Id = Shape.getAsyncCoroId();
IRBuilder<> Builder(Id);

auto *FramePtr = Id->getStorage();
Expand Down Expand Up @@ -1781,7 +1780,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
F.removeRetAttr(Attribute::NonNull);

// Allocate the frame.
auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
auto *Id = Shape.getRetconCoroId();
Value *RawFramePtr;
if (Shape.RetconLowering.IsFrameInlineInStorage) {
RawFramePtr = Id->getStorage();
Expand Down
Loading