Skip to content

Disable optimizations based on lifetime intrinsics #3336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1240,8 +1240,10 @@ namespace {
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker)
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {}
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
bool ShouldUseLifetimeStartInfo)
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}

void visit(Instruction &I) {
Users.insert(&I);
Expand Down Expand Up @@ -1389,6 +1391,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
bool MayWriteBeforeCoroBegin{false};
bool ShouldUseLifetimeStartInfo{true};

mutable llvm::Optional<bool> ShouldLiveOnFrame{};

Expand All @@ -1397,7 +1400,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// more precise. We look at every pair of lifetime.start intrinsic and
// every basic block that uses the pointer to see if they cross suspension
// points. The uses cover both direct uses as well as indirect uses.
if (!LifetimeStarts.empty()) {
if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
for (auto *I : Users)
for (auto *S : LifetimeStarts)
if (Checker.isDefinitionAcrossSuspend(*S, I))
Expand Down Expand Up @@ -2489,8 +2492,15 @@ static void collectFrameAllocas(Function &F, coro::Shape &Shape,
continue;
}
DominatorTree DT(F);
// The code that uses lifetime.start intrinsic does not work for functions
// with loops without exit. Disable it on ABIs we know to generate such
// code.
bool ShouldUseLifetimeStartInfo =
(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce);
AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT,
*Shape.CoroBegin, Checker};
*Shape.CoroBegin, Checker,
ShouldUseLifetimeStartInfo};
Visitor.visitPtr(*AI);
if (!Visitor.getShouldLiveOnFrame())
continue;
Expand Down Expand Up @@ -2679,7 +2689,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
}
}

sinkLifetimeStartMarkers(F, Shape, Checker);
if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce)
sinkLifetimeStartMarkers(F, Shape, Checker);

if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
collectFrameAllocas(F, Shape, Checker, FrameData.Allocas);
LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
; RUN: opt < %s -enable-coroutines -passes='default<O0>' -S | FileCheck --check-prefixes=CHECK %s
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK %s

target datalayout = "p:64:64:64"

%async.task = type { i64 }
%async.actor = type { i64 }
%async.fp = type <{ i32, i32 }>

%async.ctxt = type { i8*, void (i8*, %async.task*, %async.actor*)* }

; The async callee.
@my_other_async_function_fp = external global <{ i32, i32 }>
declare void @my_other_async_function(i8* %async.ctxt)

@my_async_function_fp = constant <{ i32, i32 }>
<{ i32 trunc ( ; Relative pointer to async function
i64 sub (
i64 ptrtoint (void (i8*)* @my_async_function to i64),
i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
)
to i32),
i32 128 ; Initial async context size without space for frame
}>

define swiftcc void @my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
%callee = bitcast i8* %fnPtr to void(i8*)*
tail call swiftcc void %callee(i8* %async.ctxt)
ret void
}

declare void @escape(i64*)
declare void @store_resume(i8*)
define i8* @resume_context_projection(i8* %ctxt) {
entry:
%resume_ctxt_addr = bitcast i8* %ctxt to i8**
%resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
ret i8* %resume_ctxt
}

; The address of alloca escapes but the analysis based on lifetimes fails to see
; that it can't localize this alloca.
; CHECK: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) {
; CHECK: entry:
; CHECK-NOT: ret
; CHECK-NOT: [[ESCAPED_ADDR:%.*]] = alloca i64, align 8
; CHECK: ret
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) {
entry:
%escaped_addr = alloca i64

%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
%ltb = bitcast i64* %escaped_addr to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %ltb)
call void @escape(i64* %escaped_addr)
br label %callblock


callblock:

%callee_context = call i8* @context_alloc()

%resume.func_ptr = call i8* @llvm.coro.async.resume()
call void @store_resume(i8* %resume.func_ptr)
%resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8*
%callee = bitcast void(i8*)* @asyncSuspend to i8*
%res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
i8* %resume.func_ptr,
i8* %resume_proj_fun,
void (i8*, i8*)* @my_other_async_function_fp.apply,
i8* %callee, i8* %callee_context)
br label %callblock
}

declare { i8*, i8*, i8*, i8* } @llvm.coro.suspend.async.sl_p0i8p0i8p0i8p0i8s(i32, i8*, i8*, ...)
declare i8* @llvm.coro.prepare.async(i8*)
declare token @llvm.coro.id.async(i32, i32, i32, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end.async(i8*, i1, ...)
declare i1 @llvm.coro.end(i8*, i1)
declare {i8*, i8*, i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)
declare i8* @context_alloc()
declare void @llvm.coro.async.context.dealloc(i8*)
declare swiftcc void @asyncSuspend(i8*)
declare i8* @llvm.coro.async.resume()
declare void @llvm.coro.async.size.replace(i8*, i8*)
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
115 changes: 115 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-async-addr-lifetime-start-bug.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; RUN: opt < %s -enable-coroutines -passes='default<O0>' -S | FileCheck --check-prefixes=CHECK %s
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK %s

target datalayout = "p:64:64:64"

%async.task = type { i64 }
%async.actor = type { i64 }
%async.fp = type <{ i32, i32 }>

%async.ctxt = type { i8*, void (i8*, %async.task*, %async.actor*)* }

; The async callee.
@my_other_async_function_fp = external global <{ i32, i32 }>
declare void @my_other_async_function(i8* %async.ctxt)

@my_async_function_fp = constant <{ i32, i32 }>
<{ i32 trunc ( ; Relative pointer to async function
i64 sub (
i64 ptrtoint (void (i8*)* @my_async_function to i64),
i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
)
to i32),
i32 128 ; Initial async context size without space for frame
}>

define swiftcc void @my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
%callee = bitcast i8* %fnPtr to void(i8*)*
tail call swiftcc void %callee(i8* %async.ctxt)
ret void
}

declare void @escape(i64*)
declare void @store_resume(i8*)
declare i1 @exitLoop()
define i8* @resume_context_projection(i8* %ctxt) {
entry:
%resume_ctxt_addr = bitcast i8* %ctxt to i8**
%resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
ret i8* %resume_ctxt
}

define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) {
entry:
%escaped_addr = alloca i64

%id = call token @llvm.coro.id.async(i32 128, i32 16, i32 0,
i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
%ltb = bitcast i64* %escaped_addr to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %ltb)
br label %callblock


callblock:

%callee_context = call i8* @context_alloc()

%resume.func_ptr = call i8* @llvm.coro.async.resume()
call void @store_resume(i8* %resume.func_ptr)
%resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8*
%callee = bitcast void(i8*)* @asyncSuspend to i8*
%res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
i8* %resume.func_ptr,
i8* %resume_proj_fun,
void (i8*, i8*)* @my_other_async_function_fp.apply,
i8* %callee, i8* %callee_context)
call void @escape(i64* %escaped_addr)
%exitCond = call i1 @exitLoop()

;; We used to move the lifetime.start intrinsic here =>
;; This exposes two bugs:
; 1.) The code should use the basic block start not end as insertion point
; More problematically:
; 2.) The code marks the stack object as not alive for part of the loop.

br i1 %exitCond, label %loop_exit, label %loop
%res2 = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
i8* %resume.func_ptr,
i8* %resume_proj_fun,
void (i8*, i8*)* @my_other_async_function_fp.apply,
i8* %callee, i8* %callee_context)

%exitCond2 = call i1 @exitLoop()
br i1 %exitCond2, label %loop_exit, label %loop

loop:
br label %callblock

loop_exit:
call void @llvm.lifetime.end.p0i8(i64 4, i8* %ltb)
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 false)
unreachable
}

; CHECK: define {{.*}} void @my_async_function.resume.0(
; CHECK-NOT: call void @llvm.lifetime.start.p0i8(i64 4, i8* %3)
; CHECK: br i1 %exitCond, label %loop_exit, label %loop
; CHECK: lifetime.end
; CHECK: }

declare { i8*, i8*, i8*, i8* } @llvm.coro.suspend.async.sl_p0i8p0i8p0i8p0i8s(i32, i8*, i8*, ...)
declare i8* @llvm.coro.prepare.async(i8*)
declare token @llvm.coro.id.async(i32, i32, i32, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end.async(i8*, i1, ...)
declare i1 @llvm.coro.end(i8*, i1)
declare {i8*, i8*, i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)
declare i8* @context_alloc()
declare void @llvm.coro.async.context.dealloc(i8*)
declare swiftcc void @asyncSuspend(i8*)
declare i8* @llvm.coro.async.resume()
declare void @llvm.coro.async.size.replace(i8*, i8*)
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }