Skip to content

Commit 2937f8d

Browse files
committed
[coro async] Cap the alignment of spilled values (vs. allocas) at the max frame alignment
Before this patch we would normally use the ABI alignment which can be to high for the context alginment. For spilled values we don't need ABI alignment, since the frame entry's address is not escaped. rdar://79664965 Differential Revision: https://reviews.llvm.org/D105288
1 parent 366805e commit 2937f8d

File tree

2 files changed

+41
-13
lines changed

2 files changed

+41
-13
lines changed

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -429,12 +429,15 @@ class FrameTypeBuilder {
429429
Align StructAlign;
430430
bool IsFinished = false;
431431

432+
Optional<Align> MaxFrameAlignment;
433+
432434
SmallVector<Field, 8> Fields;
433435
DenseMap<Value*, unsigned> FieldIndexByKey;
434436

435437
public:
436-
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
437-
: DL(DL), Context(Context) {}
438+
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
439+
Optional<Align> MaxFrameAlignment)
440+
: DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
438441

439442
/// Add a field to this structure for the storage of an `alloca`
440443
/// instruction.
@@ -485,7 +488,8 @@ class FrameTypeBuilder {
485488

486489
/// Add a field to this structure.
487490
LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
488-
bool IsHeader = false) {
491+
bool IsHeader = false,
492+
bool IsSpillOfValue = false) {
489493
assert(!IsFinished && "adding fields to a finished builder");
490494
assert(Ty && "must provide a type for a field");
491495

@@ -500,8 +504,16 @@ class FrameTypeBuilder {
500504

501505
// The field alignment might not be the type alignment, but we need
502506
// to remember the type alignment anyway to build the type.
503-
Align TyAlignment = DL.getABITypeAlign(Ty);
504-
if (!FieldAlignment) FieldAlignment = TyAlignment;
507+
// If we are spilling values we don't need to worry about ABI alignment
508+
// concerns.
509+
auto ABIAlign = DL.getABITypeAlign(Ty);
510+
Align TyAlignment =
511+
(IsSpillOfValue && MaxFrameAlignment)
512+
? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
513+
: ABIAlign;
514+
if (!FieldAlignment) {
515+
FieldAlignment = TyAlignment;
516+
}
505517

506518
// Lay out header fields immediately.
507519
uint64_t Offset;
@@ -1089,7 +1101,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
10891101
return StructType::create(C, Name);
10901102
}();
10911103

1092-
FrameTypeBuilder B(C, DL);
1104+
// We will use this value to cap the alignment of spilled values.
1105+
Optional<Align> MaxFrameAlignment;
1106+
if (Shape.ABI == coro::ABI::Async)
1107+
MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1108+
FrameTypeBuilder B(C, DL, MaxFrameAlignment);
10931109

10941110
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
10951111
Optional<FieldIDType> SwitchIndexFieldId;
@@ -1142,7 +1158,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
11421158
if (const Argument *A = dyn_cast<Argument>(S.first))
11431159
if (A->hasByValAttr())
11441160
FieldType = FieldType->getPointerElementType();
1145-
FieldIDType Id = B.addField(FieldType, None);
1161+
FieldIDType Id =
1162+
B.addField(FieldType, None, false /*header*/, true /*IsSpillOfValue*/);
11461163
FrameData.setFieldIndex(S.first, Id);
11471164
}
11481165

@@ -1545,6 +1562,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
15451562

15461563
for (auto const &E : FrameData.Spills) {
15471564
Value *Def = E.first;
1565+
auto SpillAlignment = Align(FrameData.getAlign(Def));
15481566
// Create a store instruction storing the value into the
15491567
// coroutine frame.
15501568
Instruction *InsertPt = nullptr;
@@ -1601,9 +1619,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
16011619
// instead of the pointer itself.
16021620
auto *Value =
16031621
Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
1604-
Builder.CreateStore(Value, G);
1622+
Builder.CreateAlignedStore(Value, G, SpillAlignment);
16051623
} else {
1606-
Builder.CreateStore(Def, G);
1624+
Builder.CreateAlignedStore(Def, G, SpillAlignment);
16071625
}
16081626

16091627
BasicBlock *CurrentBlock = nullptr;
@@ -1621,9 +1639,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
16211639
if (NeedToCopyArgPtrValue)
16221640
CurrentReload = GEP;
16231641
else
1624-
CurrentReload = Builder.CreateLoad(
1642+
CurrentReload = Builder.CreateAlignedLoad(
16251643
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1626-
E.first->getName() + Twine(".reload"));
1644+
SpillAlignment, E.first->getName() + Twine(".reload"));
16271645

16281646
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
16291647
for (DbgDeclareInst *DDI : DIs) {

llvm/test/Transforms/Coroutines/coro-async.ll

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
2-
; RUN: opt < %s -enable-coroutines -O0 -S
2+
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s
33
target datalayout = "p:64:64:64"
44

55
%async.task = type { i64 }
@@ -64,6 +64,7 @@ entry:
6464
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 {
6565
entry:
6666
%tmp = alloca { i64, i64 }, align 8
67+
%vector = alloca <4 x double>, align 16
6768
%proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0
6869
%proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1
6970

@@ -95,6 +96,7 @@ entry:
9596
store i8* %async.ctxt, i8** %callee_context.caller_context.addr
9697
%resume_proj_fun = bitcast i8*(i8*)* @__swift_async_resume_project_context to i8*
9798
%callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8*
99+
%vector_spill = load <4 x double>, <4 x double>* %vector, align 16
98100
%res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
99101
i8* %resume.func_ptr,
100102
i8* %resume_proj_fun,
@@ -108,7 +110,7 @@ entry:
108110
call void @some_user(i64 %val)
109111
%val.2 = load i64, i64* %proj.2
110112
call void @some_user(i64 %val.2)
111-
113+
store <4 x double> %vector_spill, <4 x double>* %vector, align 16
112114
tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor)
113115
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
114116
unreachable
@@ -126,6 +128,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
126128
; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 }
127129

128130
; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
131+
; CHECK-O0-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
129132
; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
130133
; CHECK: coro.return:
131134
; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128
@@ -151,16 +154,23 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
151154
; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_functionTQ0_ to i8*), i8** [[RETURN_TO_CALLER_ADDR]]
152155
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8**
153156
; CHECK: store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]]
157+
; Make sure the spill is underaligned to the max context alignment (16).
158+
; CHECK-O0: [[VECTOR_SPILL:%.*]] = load <4 x double>, <4 x double>* {{.*}}
159+
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
160+
; CHECK-O0: store <4 x double> [[VECTOR_SPILL]], <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
154161
; CHECK: tail call swiftcc void @asyncSuspend(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor)
155162
; CHECK: ret void
156163
; CHECK: }
157164

158165
; CHECK-LABEL: define internal swiftcc void @my_async_functionTQ0_(i8* nocapture readonly swiftasync %0, i8* %1, i8* nocapture readnone %2)
166+
; CHECK-O0-LABEL: define internal swiftcc void @my_async_functionTQ0_(i8* swiftasync %0, i8* %1, i8* %2)
159167
; CHECK-SAME: !dbg ![[SP2:[0-9]+]] {
160168
; CHECK: entryresume.0:
161169
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8**
162170
; CHECK: [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]]
163171
; CHECK: [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128
172+
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
173+
; CHECK-O0: load <4 x double>, <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
164174
; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160
165175
; CHECK: [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8**
166176
; CHECK: [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]]

0 commit comments

Comments
 (0)