Skip to content

[coro async] Add code to support dynamic aligment of over-aligned types in async frames #4797

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,17 @@ struct FrameDataInfo {
FieldAlignMap.insert({V, Align});
}

uint64_t getDynamicAlign(Value *V) const {
auto Iter = FieldDynamicAlignMap.find(V);
assert(Iter != FieldDynamicAlignMap.end());
return Iter->second;
}

void setDynamicAlign(Value *V, uint64_t Align) {
assert(FieldDynamicAlignMap.count(V) == 0);
FieldDynamicAlignMap.insert({V, Align});
}

uint64_t getOffset(Value *V) const {
auto Iter = FieldOffsetMap.find(V);
assert(Iter != FieldOffsetMap.end());
Expand All @@ -382,6 +393,7 @@ struct FrameDataInfo {
// Map from values to their alignment on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldAlignMap;
DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
// Map from values to their offset on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldOffsetMap;
Expand Down Expand Up @@ -422,6 +434,7 @@ class FrameTypeBuilder {
FieldIDType LayoutFieldIndex;
Align Alignment;
Align TyAlignment;
uint64_t DynamicAlignBuffer;
};

const DataLayout &DL;
Expand Down Expand Up @@ -516,6 +529,18 @@ class FrameTypeBuilder {
FieldAlignment = TyAlignment;
}

// The field alignment could be bigger than the max frame case, in that case
// we request additional storage to be able to dynamically align the
// pointer.
uint64_t DynamicAlignBuffer = 0;
if (MaxFrameAlignment &&
(FieldAlignment.valueOrOne() > *MaxFrameAlignment)) {
DynamicAlignBuffer =
offsetToAlignment((*MaxFrameAlignment).value(), *FieldAlignment);
FieldAlignment = *MaxFrameAlignment;
FieldSize = FieldSize + DynamicAlignBuffer;
}

// Lay out header fields immediately.
uint64_t Offset;
if (IsHeader) {
Expand All @@ -527,7 +552,8 @@ class FrameTypeBuilder {
Offset = OptimizedStructLayoutField::FlexibleOffset;
}

Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment,
DynamicAlignBuffer});
return Fields.size() - 1;
}

Expand Down Expand Up @@ -561,6 +587,11 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Field = B.getLayoutField(getFieldIndex(I));
setFieldIndex(I, Field.LayoutFieldIndex);
setAlign(I, Field.Alignment.value());
uint64_t dynamicAlign =
Field.DynamicAlignBuffer
? Field.DynamicAlignBuffer + Field.Alignment.value()
: 0;
setDynamicAlign(I, dynamicAlign);
setOffset(I, Field.Offset);
};
LayoutIndexUpdateStarted = true;
Expand Down Expand Up @@ -759,6 +790,10 @@ void FrameTypeBuilder::finish(StructType *Ty) {
F.LayoutFieldIndex = FieldTypes.size();

FieldTypes.push_back(F.Ty);
if (F.DynamicAlignBuffer) {
FieldTypes.push_back(
ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
}
LastOffset = Offset + F.Size;
}

Expand Down Expand Up @@ -1554,7 +1589,17 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto GEP = cast<GetElementPtrInst>(
Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
if (isa<AllocaInst>(Orig)) {
if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
if (FrameData.getDynamicAlign(Orig) != 0) {
assert(FrameData.getDynamicAlign(Orig) == AI->getAlignment());
auto *M = AI->getModule();
auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
auto *AlignMask = ConstantInt::get(IntPtrTy, AI->getAlignment() - 1);
PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
return Builder.CreateIntToPtr(PtrValue, AI->getType());
}
// If the type of GEP is not equal to the type of AllocaInst, it implies
// that the AllocaInst may be reused in the Frame slot of other
// AllocaInst. So We cast GEP to the AllocaInst here to re-use
Expand Down
102 changes: 102 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-async-dyn-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck %s

target datalayout = "p:64:64:64"

%async.task = type { i64 }
%async.actor = type { i64 }
%async.fp = type <{ i32, i32 }>

%async.ctxt = type { i8*, void (i8*)* }

@my_other_async_function_fp = external global <{ i32, i32 }>
declare void @my_other_async_function(i8* %async.ctxt)

@my_async_function_fp = constant <{ i32, i32 }>
<{ i32 trunc (
i64 sub (
i64 ptrtoint (void (i8*)* @my_async_function to i64),
i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
)
to i32),
i32 32
}>

declare void @opaque(i64*)
declare i8* @llvm.coro.async.context.alloc(i8*, i8*)
declare void @llvm.coro.async.context.dealloc(i8*)
declare i8* @llvm.coro.async.resume()
declare token @llvm.coro.id.async(i32, i32, i32, i8*)
declare i8* @llvm.coro.begin(token, i8*)
declare i1 @llvm.coro.end.async(i8*, i1, ...)
declare i1 @llvm.coro.end(i8*, i1)
declare swiftcc void @asyncReturn(i8*)
declare swiftcc void @asyncSuspend(i8*)
declare {i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)

define swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
%callee = bitcast i8* %fnPtr to void(i8*)*
tail call swiftcc void %callee(i8* %async.ctxt)
ret void
}

define i8* @__swift_async_resume_project_context(i8* %ctxt) {
entry:
%resume_ctxt_addr = bitcast i8* %ctxt to i8**
%resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
ret i8* %resume_ctxt
}


; CHECK: %my_async_function.Frame = type { i64, [48 x i8], i64, i64, [16 x i8], i8*, i64, i8* }
; CHECK: define swiftcc void @my_async_function
; CHECK: [[T0:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 3
; CHECK: [[T1:%.*]] = ptrtoint i64* [[T0]] to i64
; CHECK: [[T2:%.*]] = add i64 [[T1]], 31
; CHECK: [[T3:%.*]] = and i64 [[T2]], -32
; CHECK: [[T4:%.*]] = inttoptr i64 [[T3]] to i64*
; CHECK: [[T5:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 0
; CHECK: [[T6:%.*]] = ptrtoint i64* [[T5]] to i64
; CHECK: [[T7:%.*]] = add i64 [[T6]], 63
; CHECK: [[T8:%.*]] = and i64 [[T7]], -64
; CHECK: [[T9:%.*]] = inttoptr i64 [[T8]] to i64*
; CHECK: store i64 2, i64* [[T4]]
; CHECK: store i64 3, i64* [[T9]]

define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) "coroutine.presplit"="1" {
entry:
%tmp = alloca i64, align 8
%tmp2 = alloca i64, align 16
%tmp3 = alloca i64, align 32
%tmp4 = alloca i64, align 64

%id = call token @llvm.coro.id.async(i32 32, i32 16, i32 0,
i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
store i64 0, i64* %tmp
store i64 1, i64* %tmp2
store i64 2, i64* %tmp3
store i64 3, i64* %tmp4

%callee_context = call i8* @llvm.coro.async.context.alloc(i8* null, i8* null)
%callee_context.0 = bitcast i8* %callee_context to %async.ctxt*
%callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1
%return_to_caller.addr = bitcast void(i8*)** %callee_context.return_to_caller.addr to i8**
%resume.func_ptr = call i8* @llvm.coro.async.resume()
store i8* %resume.func_ptr, i8** %return_to_caller.addr

%callee = bitcast void(i8*)* @asyncSuspend to i8*
%resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
%res = call {i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
i8* %resume.func_ptr,
i8* %resume_proj_fun,
void (i8*, i8*)* @my_async_function.my_other_async_function_fp.apply,
i8* %callee, i8* %callee_context)
call void @opaque(i64* %tmp)
call void @opaque(i64* %tmp2)
call void @opaque(i64* %tmp3)
call void @opaque(i64* %tmp4)
call void @llvm.coro.async.context.dealloc(i8* %callee_context)
tail call swiftcc void @asyncReturn(i8* %async.ctxt)
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
unreachable
}