Skip to content

[Coroutines] Inline the .noalloc ramp function marked coro_safe_elide #114004

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions llvm/include/llvm/Transforms/Coroutines/CoroAnnotationElide.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@
#ifndef LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H
#define LLVM_TRANSFORMS_COROUTINES_COROANNOTATIONELIDE_H

#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class Function;

struct CoroAnnotationElidePass : PassInfoMixin<CoroAnnotationElidePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
CoroAnnotationElidePass() {}

PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);

static bool isRequired() { return false; }
};
} // end namespace llvm
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,8 +984,7 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,

if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
MainCGPipeline.addPass(
createCGSCCToFunctionPassAdaptor(CoroAnnotationElidePass()));
MainCGPipeline.addPass(CoroAnnotationElidePass());
}

// Make sure we don't affect potential future NoRerun CGSCC adaptors.
Expand Down Expand Up @@ -1036,7 +1035,8 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
CoroSplitPass(Level != OptimizationLevel::O0)));
MPM.addPass(createModuleToFunctionPassAdaptor(CoroAnnotationElidePass()));
MPM.addPass(
createModuleToPostOrderCGSCCPassAdaptor(CoroAnnotationElidePass()));
}

return MPM;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ CGSCC_PASS("attributor-light-cgscc", AttributorLightCGSCCPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
CGSCC_PASS("coro-annotation-elide", CoroAnnotationElidePass())
#undef CGSCC_PASS

#ifndef CGSCC_PASS_WITH_PARAMS
Expand Down Expand Up @@ -346,7 +347,6 @@ FUNCTION_PASS("complex-deinterleaving", ComplexDeinterleavingPass(TM))
FUNCTION_PASS("consthoist", ConstantHoistingPass())
FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
FUNCTION_PASS("coro-annotation-elide", CoroAnnotationElidePass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("count-visits", CountVisitsPass())
FUNCTION_PASS("dce", DCEPass())
Expand Down
124 changes: 76 additions & 48 deletions llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@

#include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"

#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"

#include <cassert>

Expand Down Expand Up @@ -90,61 +93,86 @@ static void processCall(CallBase *CB, Function *Caller, Function *NewCallee,

NewCB->removeFnAttr(llvm::Attribute::CoroElideSafe);
CB->replaceAllUsesWith(NewCB);
CB->eraseFromParent();

InlineFunctionInfo IFI;
InlineResult IR = InlineFunction(*NewCB, IFI);
if (IR.isSuccess()) {
CB->eraseFromParent();
} else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test for the failed to inline case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Sorry for the delay.

NewCB->replaceAllUsesWith(CB);
NewCB->eraseFromParent();
}
}

PreservedAnalyses CoroAnnotationElidePass::run(Function &F,
FunctionAnalysisManager &FAM) {
PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
bool Changed = false;
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);

Function *NewCallee =
F.getParent()->getFunction((F.getName() + ".noalloc").str());

if (!NewCallee)
return PreservedAnalyses::all();

auto FramePtrArgPosition = NewCallee->arg_size() - 1;
auto FrameSize = NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
auto FrameAlign = NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();
auto &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();

SmallVector<CallBase *, 4> Users;
for (auto *U : F.users()) {
if (auto *CB = dyn_cast<CallBase>(U)) {
if (CB->getCalledFunction() == &F)
Users.push_back(CB);
}
}

auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);

for (auto *CB : Users) {
auto *Caller = CB->getFunction();
if (!Caller)
for (LazyCallGraph::Node &N : C) {
Function *Callee = &N.getFunction();
Function *NewCallee = Callee->getParent()->getFunction(
(Callee->getName() + ".noalloc").str());
if (!NewCallee)
continue;

bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);

ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", F.getName()) << "' elided in '"
<< ore::NV("caller", Caller->getName()) << "'";
});

FAM.invalidate(*Caller, PreservedAnalyses::none());
Changed = true;
} else {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "CoroAnnotationElide",
Caller)
<< "'" << ore::NV("callee", F.getName()) << "' not elided in '"
<< ore::NV("caller", Caller->getName()) << "' (caller_presplit="
<< ore::NV("caller_presplit", IsCallerPresplitCoroutine)
<< ", elide_safe_attr=" << ore::NV("elide_safe_attr", HasAttr)
<< ")";
});
SmallVector<CallBase *, 4> Users;
for (auto *U : Callee->users()) {
if (auto *CB = dyn_cast<CallBase>(U)) {
if (CB->getCalledFunction() == Callee)
Users.push_back(CB);
}
}
auto FramePtrArgPosition = NewCallee->arg_size() - 1;
auto FrameSize =
NewCallee->getParamDereferenceableBytes(FramePtrArgPosition);
auto FrameAlign =
NewCallee->getParamAlign(FramePtrArgPosition).valueOrOne();

auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(*Callee);

for (auto *CB : Users) {
auto *Caller = CB->getFunction();
if (!Caller)
continue;

bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
auto *CallerN = CG.lookup(*Caller);
auto *CallerC = CG.lookupSCC(*CallerN);
processCall(CB, Caller, NewCallee, FrameSize, FrameAlign);

ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
<< "'";
});

FAM.invalidate(*Caller, PreservedAnalyses::none());
Changed = true;
updateCGAndAnalysisManagerForCGSCCPass(CG, *CallerC, *CallerN, AM, UR,
FAM);

} else {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "CoroAnnotationElide",
Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '" << ore::NV("caller", Caller->getName())
<< "' (caller_presplit="
<< ore::NV("caller_presplit", IsCallerPresplitCoroutine)
<< ", elide_safe_attr=" << ore::NV("elide_safe_attr", HasAttr)
<< ")";
});
}
}
}

Expand Down
10 changes: 7 additions & 3 deletions llvm/test/Transforms/Coroutines/coro-transform-must-elide.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,13 @@ define ptr @caller() #0 {
entry:
%task = call ptr @callee(i8 0) #1
ret ptr %task

; CHECK: %[[FRAME:.+]] = alloca [32 x i8], align 8
; CHECK-NEXT: %[[TASK:.+]] = call ptr @callee.noalloc(i8 0, ptr %[[FRAME]])
; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8
; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TASK]])
; CHECK-NEXT: %[[ID:.+]] = call token @llvm.coro.id(i32 0, ptr null, ptr @callee, ptr @callee.resumers)
; CHECK-NEXT: %[[HDL:.+]] = call ptr @llvm.coro.begin(token %[[ID]], ptr null)
; CHECK-NEXT: store ptr %[[HDL]], ptr %[[TASK]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TASK]])
; CHECK-NEXT: ret ptr %[[TASK]]
}

Expand Down
32 changes: 32 additions & 0 deletions llvm/test/Transforms/Coroutines/gh114487-crash-in-cgscc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; Verify that we don't crash when eliding coro_elide_safe callsites.
; RUN: opt < %s -passes='cgscc(function<>(simplifycfg<>),function-attrs,coro-split,coro-annotation-elide)' -S | FileCheck %s

; CHECK-LABEL: define void @foo()
define void @foo() presplitcoroutine personality ptr null {
entry:
%0 = call token @llvm.coro.save(ptr null)
br label %branch

branch:
; Check that we don't call bar at all.
; CHECK-NOT: call void @bar{{.*}}
call void @bar() coro_elide_safe
; CHECK: call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @bar.resumers)
ret void
}

; CHECK-LABEL: define void @bar()
define void @bar() presplitcoroutine personality ptr null {
entry:
%0 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%1 = call ptr @llvm.coro.begin(token %0, ptr null)
%2 = call token @llvm.coro.save(ptr null)
%3 = call i8 @llvm.coro.suspend(token none, i1 false)
ret void
}

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) nounwind
declare ptr @llvm.coro.begin(token, ptr writeonly) nounwind
declare token @llvm.coro.save(ptr) nomerge nounwind
declare i8 @llvm.coro.suspend(token, i1) nounwind

33 changes: 33 additions & 0 deletions llvm/test/Transforms/Coroutines/gh114487-non-inlinable.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; Verify that we don't crash when eliding coro_elide_safe callsites.
; RUN: opt < %s -passes='cgscc(function<>(simplifycfg<>),function-attrs,coro-annotation-elide)' -S | FileCheck %s

; CHECK-LABEL: define void @foo()
define void @foo() presplitcoroutine personality ptr null {
entry:
%0 = call token @llvm.coro.save(ptr null)
br label %branch

branch:
; Check that we still call bar() because we can't inline bar.noalloc()
; CHECK: call void @bar()
call void @bar() coro_elide_safe
ret void
}

; CHECK-LABEL: define void @bar()
define void @bar() personality ptr null {
entry:
%0 = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%1 = call ptr @llvm.coro.begin(token %0, ptr null)
%2 = call token @llvm.coro.save(ptr null)
%3 = call i8 @llvm.coro.suspend(token none, i1 false)
ret void
}

declare void @bar.noalloc(ptr)

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) nounwind
declare ptr @llvm.coro.begin(token, ptr writeonly) nounwind
declare token @llvm.coro.save(ptr) nomerge nounwind
declare i8 @llvm.coro.suspend(token, i1) nounwind

Loading