Skip to content

Commit 08da779

Browse files
authored
Merge pull request #4800 from aschwaighofer/coro_async_frame_dynamic_align_5.7
[5.7] [coro async] Add code to support dynamic aligment of over-aligned types in async frames
2 parents c29ca02 + 42bf6cf commit 08da779

File tree

2 files changed

+149
-2
lines changed

2 files changed

+149
-2
lines changed

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,17 @@ struct FrameDataInfo {
357357
FieldAlignMap.insert({V, Align});
358358
}
359359

360+
uint64_t getDynamicAlign(Value *V) const {
361+
auto Iter = FieldDynamicAlignMap.find(V);
362+
assert(Iter != FieldDynamicAlignMap.end());
363+
return Iter->second;
364+
}
365+
366+
void setDynamicAlign(Value *V, uint64_t Align) {
367+
assert(FieldDynamicAlignMap.count(V) == 0);
368+
FieldDynamicAlignMap.insert({V, Align});
369+
}
370+
360371
uint64_t getOffset(Value *V) const {
361372
auto Iter = FieldOffsetMap.find(V);
362373
assert(Iter != FieldOffsetMap.end());
@@ -382,6 +393,7 @@ struct FrameDataInfo {
382393
// Map from values to their alignment on the frame. They would be set after
383394
// the frame is built.
384395
DenseMap<Value *, uint64_t> FieldAlignMap;
396+
DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
385397
// Map from values to their offset on the frame. They would be set after
386398
// the frame is built.
387399
DenseMap<Value *, uint64_t> FieldOffsetMap;
@@ -422,6 +434,7 @@ class FrameTypeBuilder {
422434
FieldIDType LayoutFieldIndex;
423435
Align Alignment;
424436
Align TyAlignment;
437+
uint64_t DynamicAlignBuffer;
425438
};
426439

427440
const DataLayout &DL;
@@ -516,6 +529,18 @@ class FrameTypeBuilder {
516529
FieldAlignment = TyAlignment;
517530
}
518531

532+
// The field alignment could be bigger than the max frame case, in that case
533+
// we request additional storage to be able to dynamically align the
534+
// pointer.
535+
uint64_t DynamicAlignBuffer = 0;
536+
if (MaxFrameAlignment &&
537+
(FieldAlignment.valueOrOne() > *MaxFrameAlignment)) {
538+
DynamicAlignBuffer =
539+
offsetToAlignment((*MaxFrameAlignment).value(), *FieldAlignment);
540+
FieldAlignment = *MaxFrameAlignment;
541+
FieldSize = FieldSize + DynamicAlignBuffer;
542+
}
543+
519544
// Lay out header fields immediately.
520545
uint64_t Offset;
521546
if (IsHeader) {
@@ -527,7 +552,8 @@ class FrameTypeBuilder {
527552
Offset = OptimizedStructLayoutField::FlexibleOffset;
528553
}
529554

530-
Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment});
555+
Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment,
556+
DynamicAlignBuffer});
531557
return Fields.size() - 1;
532558
}
533559

@@ -561,6 +587,11 @@ void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
561587
auto Field = B.getLayoutField(getFieldIndex(I));
562588
setFieldIndex(I, Field.LayoutFieldIndex);
563589
setAlign(I, Field.Alignment.value());
590+
uint64_t dynamicAlign =
591+
Field.DynamicAlignBuffer
592+
? Field.DynamicAlignBuffer + Field.Alignment.value()
593+
: 0;
594+
setDynamicAlign(I, dynamicAlign);
564595
setOffset(I, Field.Offset);
565596
};
566597
LayoutIndexUpdateStarted = true;
@@ -759,6 +790,10 @@ void FrameTypeBuilder::finish(StructType *Ty) {
759790
F.LayoutFieldIndex = FieldTypes.size();
760791

761792
FieldTypes.push_back(F.Ty);
793+
if (F.DynamicAlignBuffer) {
794+
FieldTypes.push_back(
795+
ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
796+
}
762797
LastOffset = Offset + F.Size;
763798
}
764799

@@ -1554,7 +1589,17 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
15541589

15551590
auto GEP = cast<GetElementPtrInst>(
15561591
Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1557-
if (isa<AllocaInst>(Orig)) {
1592+
if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1593+
if (FrameData.getDynamicAlign(Orig) != 0) {
1594+
assert(FrameData.getDynamicAlign(Orig) == AI->getAlignment());
1595+
auto *M = AI->getModule();
1596+
auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1597+
auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1598+
auto *AlignMask = ConstantInt::get(IntPtrTy, AI->getAlignment() - 1);
1599+
PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1600+
PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1601+
return Builder.CreateIntToPtr(PtrValue, AI->getType());
1602+
}
15581603
// If the type of GEP is not equal to the type of AllocaInst, it implies
15591604
// that the AllocaInst may be reused in the Frame slot of other
15601605
// AllocaInst. So We cast GEP to the AllocaInst here to re-use
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck %s
2+
3+
target datalayout = "p:64:64:64"
4+
5+
%async.task = type { i64 }
6+
%async.actor = type { i64 }
7+
%async.fp = type <{ i32, i32 }>
8+
9+
%async.ctxt = type { i8*, void (i8*)* }
10+
11+
@my_other_async_function_fp = external global <{ i32, i32 }>
12+
declare void @my_other_async_function(i8* %async.ctxt)
13+
14+
@my_async_function_fp = constant <{ i32, i32 }>
15+
<{ i32 trunc (
16+
i64 sub (
17+
i64 ptrtoint (void (i8*)* @my_async_function to i64),
18+
i64 ptrtoint (i32* getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @my_async_function_fp, i32 0, i32 1) to i64)
19+
)
20+
to i32),
21+
i32 32
22+
}>
23+
24+
declare void @opaque(i64*)
25+
declare i8* @llvm.coro.async.context.alloc(i8*, i8*)
26+
declare void @llvm.coro.async.context.dealloc(i8*)
27+
declare i8* @llvm.coro.async.resume()
28+
declare token @llvm.coro.id.async(i32, i32, i32, i8*)
29+
declare i8* @llvm.coro.begin(token, i8*)
30+
declare i1 @llvm.coro.end.async(i8*, i1, ...)
31+
declare i1 @llvm.coro.end(i8*, i1)
32+
declare swiftcc void @asyncReturn(i8*)
33+
declare swiftcc void @asyncSuspend(i8*)
34+
declare {i8*} @llvm.coro.suspend.async(i32, i8*, i8*, ...)
35+
36+
define swiftcc void @my_async_function.my_other_async_function_fp.apply(i8* %fnPtr, i8* %async.ctxt) {
37+
%callee = bitcast i8* %fnPtr to void(i8*)*
38+
tail call swiftcc void %callee(i8* %async.ctxt)
39+
ret void
40+
}
41+
42+
define i8* @__swift_async_resume_project_context(i8* %ctxt) {
43+
entry:
44+
%resume_ctxt_addr = bitcast i8* %ctxt to i8**
45+
%resume_ctxt = load i8*, i8** %resume_ctxt_addr, align 8
46+
ret i8* %resume_ctxt
47+
}
48+
49+
50+
; CHECK: %my_async_function.Frame = type { i64, [48 x i8], i64, i64, [16 x i8], i8*, i64, i8* }
51+
; CHECK: define swiftcc void @my_async_function
52+
; CHECK: [[T0:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 3
53+
; CHECK: [[T1:%.*]] = ptrtoint i64* [[T0]] to i64
54+
; CHECK: [[T2:%.*]] = add i64 [[T1]], 31
55+
; CHECK: [[T3:%.*]] = and i64 [[T2]], -32
56+
; CHECK: [[T4:%.*]] = inttoptr i64 [[T3]] to i64*
57+
; CHECK: [[T5:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* %FramePtr, i32 0, i32 0
58+
; CHECK: [[T6:%.*]] = ptrtoint i64* [[T5]] to i64
59+
; CHECK: [[T7:%.*]] = add i64 [[T6]], 63
60+
; CHECK: [[T8:%.*]] = and i64 [[T7]], -64
61+
; CHECK: [[T9:%.*]] = inttoptr i64 [[T8]] to i64*
62+
; CHECK: store i64 2, i64* [[T4]]
63+
; CHECK: store i64 3, i64* [[T9]]
64+
65+
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt) "coroutine.presplit"="1" {
66+
entry:
67+
%tmp = alloca i64, align 8
68+
%tmp2 = alloca i64, align 16
69+
%tmp3 = alloca i64, align 32
70+
%tmp4 = alloca i64, align 64
71+
72+
%id = call token @llvm.coro.id.async(i32 32, i32 16, i32 0,
73+
i8* bitcast (<{i32, i32}>* @my_async_function_fp to i8*))
74+
%hdl = call i8* @llvm.coro.begin(token %id, i8* null)
75+
store i64 0, i64* %tmp
76+
store i64 1, i64* %tmp2
77+
store i64 2, i64* %tmp3
78+
store i64 3, i64* %tmp4
79+
80+
%callee_context = call i8* @llvm.coro.async.context.alloc(i8* null, i8* null)
81+
%callee_context.0 = bitcast i8* %callee_context to %async.ctxt*
82+
%callee_context.return_to_caller.addr = getelementptr inbounds %async.ctxt, %async.ctxt* %callee_context.0, i32 0, i32 1
83+
%return_to_caller.addr = bitcast void(i8*)** %callee_context.return_to_caller.addr to i8**
84+
%resume.func_ptr = call i8* @llvm.coro.async.resume()
85+
store i8* %resume.func_ptr, i8** %return_to_caller.addr
86+
87+
%callee = bitcast void(i8*)* @asyncSuspend to i8*
88+
%resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
89+
%res = call {i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
90+
i8* %resume.func_ptr,
91+
i8* %resume_proj_fun,
92+
void (i8*, i8*)* @my_async_function.my_other_async_function_fp.apply,
93+
i8* %callee, i8* %callee_context)
94+
call void @opaque(i64* %tmp)
95+
call void @opaque(i64* %tmp2)
96+
call void @opaque(i64* %tmp3)
97+
call void @opaque(i64* %tmp4)
98+
call void @llvm.coro.async.context.dealloc(i8* %callee_context)
99+
tail call swiftcc void @asyncReturn(i8* %async.ctxt)
100+
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
101+
unreachable
102+
}

0 commit comments

Comments
 (0)