Skip to content

Commit c575f28

Browse files
committed
[coro async] Cap the alignment of spilled values (vs. allocas) at the max frame alignment
Before this patch we would normally use the ABI alignment which can be to high for the context alginment. For spilled values we don't need ABI alignment, since the frame entry's address is not escaped. rdar://79664965 Differential Revision: https://reviews.llvm.org/D105288
1 parent 9e29835 commit c575f28

File tree

2 files changed

+60
-12
lines changed

2 files changed

+60
-12
lines changed

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,16 @@ struct FrameDataInfo {
335335
"Cannot set the index for the same field twice.");
336336
FieldIndexMap[V] = Index;
337337
}
338+
uint64_t getAlign(Value *V) const {
339+
auto Iter = FieldAlignMap.find(V);
340+
assert(Iter != FieldAlignMap.end());
341+
return Iter->second;
342+
}
343+
344+
void setAlign(Value *V, uint64_t Align) {
345+
assert(FieldAlignMap.count(V) == 0);
346+
FieldAlignMap.insert({V, Align});
347+
}
338348

339349
// Remap the index of every field in the frame, using the final layout index.
340350
void updateLayoutIndex(FrameTypeBuilder &B);
@@ -347,6 +357,9 @@ struct FrameDataInfo {
347357
// with their original insertion field index. After the frame is built, their
348358
// indexes will be updated into the final layout index.
349359
DenseMap<Value *, uint32_t> FieldIndexMap;
360+
// Map from values to their alignment on the frame. They would be set after
361+
// the frame is built.
362+
DenseMap<Value *, uint64_t> FieldAlignMap;
350363
};
351364
} // namespace
352365

@@ -392,12 +405,15 @@ class FrameTypeBuilder {
392405
Align StructAlign;
393406
bool IsFinished = false;
394407

408+
Optional<Align> MaxFrameAlignment;
409+
395410
SmallVector<Field, 8> Fields;
396411
DenseMap<Value*, unsigned> FieldIndexByKey;
397412

398413
public:
399-
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
400-
: DL(DL), Context(Context) {}
414+
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
415+
Optional<Align> MaxFrameAlignment)
416+
: DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
401417

402418
/// Add a field to this structure for the storage of an `alloca`
403419
/// instruction.
@@ -448,7 +464,8 @@ class FrameTypeBuilder {
448464

449465
/// Add a field to this structure.
450466
LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
451-
bool IsHeader = false) {
467+
bool IsHeader = false,
468+
bool IsSpillOfValue = false) {
452469
assert(!IsFinished && "adding fields to a finished builder");
453470
assert(Ty && "must provide a type for a field");
454471

@@ -457,8 +474,16 @@ class FrameTypeBuilder {
457474

458475
// The field alignment might not be the type alignment, but we need
459476
// to remember the type alignment anyway to build the type.
460-
Align TyAlignment = DL.getABITypeAlign(Ty);
461-
if (!FieldAlignment) FieldAlignment = TyAlignment;
477+
// If we are spilling values we don't need to worry about ABI alignment
478+
// concerns.
479+
auto ABIAlign = DL.getABITypeAlign(Ty);
480+
Align TyAlignment =
481+
(IsSpillOfValue && MaxFrameAlignment)
482+
? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
483+
: ABIAlign;
484+
if (!FieldAlignment) {
485+
FieldAlignment = TyAlignment;
486+
}
462487

463488
// Lay out header fields immediately.
464489
uint64_t Offset;
@@ -492,12 +517,18 @@ class FrameTypeBuilder {
492517
assert(IsFinished && "not yet finished!");
493518
return Fields[Id].LayoutFieldIndex;
494519
}
520+
Field getLayoutField(FieldIDType Id) const {
521+
assert(IsFinished && "not yet finished!");
522+
return Fields[Id];
523+
}
495524
};
496525
} // namespace
497526

498527
void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
499528
auto Updater = [&](Value *I) {
500-
setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
529+
auto Field = B.getLayoutField(getFieldIndex(I));
530+
setFieldIndex(I, Field.LayoutFieldIndex);
531+
setAlign(I, Field.Alignment.value());
501532
};
502533
LayoutIndexUpdateStarted = true;
503534
for (auto &S : Spills)
@@ -722,7 +753,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
722753
return StructType::create(C, Name);
723754
}();
724755

725-
FrameTypeBuilder B(C, DL);
756+
// We will use this value to cap the alignment of spilled values.
757+
Optional<Align> MaxFrameAlignment;
758+
if (Shape.ABI == coro::ABI::Async)
759+
MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
760+
FrameTypeBuilder B(C, DL, MaxFrameAlignment);
726761

727762
AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
728763
Optional<FieldIDType> SwitchIndexFieldId;
@@ -760,7 +795,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
760795
B.addFieldForAllocas(F, FrameData, Shape);
761796
// Create an entry for every spilled value.
762797
for (auto &S : FrameData.Spills) {
763-
FieldIDType Id = B.addField(S.first->getType(), None);
798+
FieldIDType Id = B.addField(S.first->getType(), None, false /*header*/,
799+
true /*IsSpillOfValue*/);
764800
FrameData.setFieldIndex(S.first, Id);
765801
}
766802

@@ -1122,6 +1158,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11221158

11231159
for (auto const &E : FrameData.Spills) {
11241160
Value *Def = E.first;
1161+
auto SpillAlignment = Align(FrameData.getAlign(Def));
11251162
// Create a store instruction storing the value into the
11261163
// coroutine frame.
11271164
Instruction *InsertPt = nullptr;
@@ -1169,7 +1206,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11691206
Builder.SetInsertPoint(InsertPt);
11701207
auto *G = Builder.CreateConstInBoundsGEP2_32(
11711208
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1172-
Builder.CreateStore(Def, G);
1209+
Builder.CreateAlignedStore(Def, G, SpillAlignment);
11731210

11741211
BasicBlock *CurrentBlock = nullptr;
11751212
Value *CurrentReload = nullptr;
@@ -1183,9 +1220,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11831220

11841221
auto *GEP = GetFramePointer(E.first);
11851222
GEP->setName(E.first->getName() + Twine(".reload.addr"));
1186-
CurrentReload = Builder.CreateLoad(
1223+
CurrentReload = Builder.CreateAlignedLoad(
11871224
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1188-
E.first->getName() + Twine(".reload"));
1225+
SpillAlignment, E.first->getName() + Twine(".reload"));
11891226

11901227
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
11911228
for (DbgDeclareInst *DDI : DIs) {

llvm/test/Transforms/Coroutines/coro-async.ll

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck --check-prefixes=CHECK %s
22
; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
3+
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s
34

45
target datalayout = "p:64:64:64"
56

@@ -58,6 +59,7 @@ entry:
5859
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 {
5960
entry:
6061
%tmp = alloca { i64, i64 }, align 8
62+
%vector = alloca <4 x double>, align 16
6163
%proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0
6264
%proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1
6365

@@ -89,6 +91,7 @@ entry:
8991
store i8* %async.ctxt, i8** %callee_context.caller_context.addr
9092
%resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8*
9193
%callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8*
94+
%vector_spill = load <4 x double>, <4 x double>* %vector, align 16
9295
%res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
9396
i8* %resume.func_ptr,
9497
i8* %resume_proj_fun,
@@ -102,7 +105,7 @@ entry:
102105
call void @some_user(i64 %val)
103106
%val.2 = load i64, i64* %proj.2
104107
call void @some_user(i64 %val.2)
105-
108+
store <4 x double> %vector_spill, <4 x double>* %vector, align 16
106109
tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor)
107110
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
108111
unreachable
@@ -120,6 +123,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
120123
; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 }
121124

122125
; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
126+
; CHECK-O0-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
123127
; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
124128
; CHECK: entry:
125129
; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128
@@ -145,16 +149,23 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
145149
; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function.resume.0 to i8*), i8** [[RETURN_TO_CALLER_ADDR]]
146150
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8**
147151
; CHECK: store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]]
152+
; Make sure the spill is underaligned to the max context alignment (16).
153+
; CHECK-O0: [[VECTOR_SPILL:%.*]] = load <4 x double>, <4 x double>* {{.*}}
154+
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
155+
; CHECK-O0: store <4 x double> [[VECTOR_SPILL]], <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
148156
; CHECK: tail call swiftcc void @asyncSuspend(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor)
149157
; CHECK: ret void
150158
; CHECK: }
151159

152160
; CHECK-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* nocapture readonly swiftasync %0, i8* %1, i8* nocapture readnone %2)
161+
; CHECK-O0-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* swiftasync %0, i8* %1, i8* %2)
153162
; CHECK-SAME: !dbg ![[SP2:[0-9]+]] {
154163
; CHECK: entryresume.0:
155164
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8**
156165
; CHECK: [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]]
157166
; CHECK: [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128
167+
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
168+
; CHECK-O0: load <4 x double>, <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
158169
; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160
159170
; CHECK: [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8**
160171
; CHECK: [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]]

0 commit comments

Comments
 (0)