Skip to content

Cap the alignment of spilled values (vs. allocas) at the max frame alignment #3071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 48 additions & 11 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,16 @@ struct FrameDataInfo {
"Cannot set the index for the same field twice.");
FieldIndexMap[V] = Index;
}
uint64_t getAlign(Value *V) const {
auto Iter = FieldAlignMap.find(V);
assert(Iter != FieldAlignMap.end());
return Iter->second;
}

void setAlign(Value *V, uint64_t Align) {
assert(FieldAlignMap.count(V) == 0);
FieldAlignMap.insert({V, Align});
}

// Remap the index of every field in the frame, using the final layout index.
void updateLayoutIndex(FrameTypeBuilder &B);
Expand All @@ -347,6 +357,9 @@ struct FrameDataInfo {
// with their original insertion field index. After the frame is built, their
// indexes will be updated into the final layout index.
DenseMap<Value *, uint32_t> FieldIndexMap;
// Map from values to their alignment on the frame. They would be set after
// the frame is built.
DenseMap<Value *, uint64_t> FieldAlignMap;
};
} // namespace

Expand Down Expand Up @@ -392,12 +405,15 @@ class FrameTypeBuilder {
Align StructAlign;
bool IsFinished = false;

Optional<Align> MaxFrameAlignment;

SmallVector<Field, 8> Fields;
DenseMap<Value*, unsigned> FieldIndexByKey;

public:
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
: DL(DL), Context(Context) {}
FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
Optional<Align> MaxFrameAlignment)
: DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}

/// Add a field to this structure for the storage of an `alloca`
/// instruction.
Expand Down Expand Up @@ -448,7 +464,8 @@ class FrameTypeBuilder {

/// Add a field to this structure.
LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment,
bool IsHeader = false) {
bool IsHeader = false,
bool IsSpillOfValue = false) {
assert(!IsFinished && "adding fields to a finished builder");
assert(Ty && "must provide a type for a field");

Expand All @@ -457,8 +474,16 @@ class FrameTypeBuilder {

// The field alignment might not be the type alignment, but we need
// to remember the type alignment anyway to build the type.
Align TyAlignment = DL.getABITypeAlign(Ty);
if (!FieldAlignment) FieldAlignment = TyAlignment;
// If we are spilling values we don't need to worry about ABI alignment
// concerns.
auto ABIAlign = DL.getABITypeAlign(Ty);
Align TyAlignment =
(IsSpillOfValue && MaxFrameAlignment)
? (*MaxFrameAlignment < ABIAlign ? *MaxFrameAlignment : ABIAlign)
: ABIAlign;
if (!FieldAlignment) {
FieldAlignment = TyAlignment;
}

// Lay out header fields immediately.
uint64_t Offset;
Expand Down Expand Up @@ -492,12 +517,18 @@ class FrameTypeBuilder {
assert(IsFinished && "not yet finished!");
return Fields[Id].LayoutFieldIndex;
}
Field getLayoutField(FieldIDType Id) const {
assert(IsFinished && "not yet finished!");
return Fields[Id];
}
};
} // namespace

void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
auto Updater = [&](Value *I) {
setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I)));
auto Field = B.getLayoutField(getFieldIndex(I));
setFieldIndex(I, Field.LayoutFieldIndex);
setAlign(I, Field.Alignment.value());
};
LayoutIndexUpdateStarted = true;
for (auto &S : Spills)
Expand Down Expand Up @@ -722,7 +753,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
return StructType::create(C, Name);
}();

FrameTypeBuilder B(C, DL);
// We will use this value to cap the alignment of spilled values.
Optional<Align> MaxFrameAlignment;
if (Shape.ABI == coro::ABI::Async)
MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
FrameTypeBuilder B(C, DL, MaxFrameAlignment);

AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
Optional<FieldIDType> SwitchIndexFieldId;
Expand Down Expand Up @@ -760,7 +795,8 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
B.addFieldForAllocas(F, FrameData, Shape);
// Create an entry for every spilled value.
for (auto &S : FrameData.Spills) {
FieldIDType Id = B.addField(S.first->getType(), None);
FieldIDType Id = B.addField(S.first->getType(), None, false /*header*/,
true /*IsSpillOfValue*/);
FrameData.setFieldIndex(S.first, Id);
}

Expand Down Expand Up @@ -1122,6 +1158,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

for (auto const &E : FrameData.Spills) {
Value *Def = E.first;
auto SpillAlignment = Align(FrameData.getAlign(Def));
// Create a store instruction storing the value into the
// coroutine frame.
Instruction *InsertPt = nullptr;
Expand Down Expand Up @@ -1169,7 +1206,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
Builder.SetInsertPoint(InsertPt);
auto *G = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
Builder.CreateStore(Def, G);
Builder.CreateAlignedStore(Def, G, SpillAlignment);

BasicBlock *CurrentBlock = nullptr;
Value *CurrentReload = nullptr;
Expand All @@ -1183,9 +1220,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,

auto *GEP = GetFramePointer(E.first);
GEP->setName(E.first->getName() + Twine(".reload.addr"));
CurrentReload = Builder.CreateLoad(
CurrentReload = Builder.CreateAlignedLoad(
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
E.first->getName() + Twine(".reload"));
SpillAlignment, E.first->getName() + Twine(".reload"));

TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
for (DbgDeclareInst *DDI : DIs) {
Expand Down
13 changes: 12 additions & 1 deletion llvm/test/Transforms/Coroutines/coro-async.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: opt < %s -enable-coroutines -O2 -S | FileCheck --check-prefixes=CHECK %s
; RUN: opt < %s -enable-coroutines -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
; RUN: opt < %s -enable-coroutines -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s

target datalayout = "p:64:64:64"

Expand Down Expand Up @@ -58,6 +59,7 @@ entry:
define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor) !dbg !1 {
entry:
%tmp = alloca { i64, i64 }, align 8
%vector = alloca <4 x double>, align 16
%proj.1 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 0
%proj.2 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %tmp, i64 0, i32 1

Expand Down Expand Up @@ -89,6 +91,7 @@ entry:
store i8* %async.ctxt, i8** %callee_context.caller_context.addr
%resume_proj_fun = bitcast i8*(i8*)* @resume_context_projection to i8*
%callee = bitcast void(i8*, %async.task*, %async.actor*)* @asyncSuspend to i8*
%vector_spill = load <4 x double>, <4 x double>* %vector, align 16
%res = call {i8*, i8*, i8*} (i32, i8*, i8*, ...) @llvm.coro.suspend.async(i32 0,
i8* %resume.func_ptr,
i8* %resume_proj_fun,
Expand All @@ -102,7 +105,7 @@ entry:
call void @some_user(i64 %val)
%val.2 = load i64, i64* %proj.2
call void @some_user(i64 %val.2)

store <4 x double> %vector_spill, <4 x double>* %vector, align 16
tail call swiftcc void @asyncReturn(i8* %async.ctxt, %async.task* %task.2, %async.actor* %actor)
call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %hdl, i1 0)
unreachable
Expand All @@ -120,6 +123,7 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 }

; CHECK-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
; CHECK-O0-LABEL: define swiftcc void @my_async_function(i8* swiftasync %async.ctxt, %async.task* %task, %async.actor* %actor)
; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
; CHECK: entry:
; CHECK: [[FRAMEPTR:%.*]] = getelementptr inbounds i8, i8* %async.ctxt, i64 128
Expand All @@ -145,16 +149,23 @@ define void @my_async_function_pa(i8* %ctxt, %async.task* %task, %async.actor* %
; CHECK: store i8* bitcast (void (i8*, i8*, i8*)* @my_async_function.resume.0 to i8*), i8** [[RETURN_TO_CALLER_ADDR]]
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* [[CALLEE_CTXT]] to i8**
; CHECK: store i8* %async.ctxt, i8** [[CALLER_CONTEXT_ADDR]]
; Make sure the spill is underaligned to the max context alignment (16).
; CHECK-O0: [[VECTOR_SPILL:%.*]] = load <4 x double>, <4 x double>* {{.*}}
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
; CHECK-O0: store <4 x double> [[VECTOR_SPILL]], <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
; CHECK: tail call swiftcc void @asyncSuspend(i8* [[CALLEE_CTXT]], %async.task* %task, %async.actor* %actor)
; CHECK: ret void
; CHECK: }

; CHECK-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* nocapture readonly swiftasync %0, i8* %1, i8* nocapture readnone %2)
; CHECK-O0-LABEL: define internal swiftcc void @my_async_function.resume.0(i8* swiftasync %0, i8* %1, i8* %2)
; CHECK-SAME: !dbg ![[SP2:[0-9]+]] {
; CHECK: entryresume.0:
; CHECK: [[CALLER_CONTEXT_ADDR:%.*]] = bitcast i8* %0 to i8**
; CHECK: [[CALLER_CONTEXT:%.*]] = load i8*, i8** [[CALLER_CONTEXT_ADDR]]
; CHECK: [[FRAME_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 128
; CHECK-O0: [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, %my_async_function.Frame* {{.*}}, i32 0, i32 1
; CHECK-O0: load <4 x double>, <4 x double>* [[VECTOR_SPILL_ADDR]], align 16
; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[CALLER_CONTEXT]], i64 160
; CHECK: [[CAST1:%.*]] = bitcast i8* [[CALLEE_CTXT_SPILL_ADDR]] to i8**
; CHECK: [[CALLEE_CTXT_RELOAD:%.*]] = load i8*, i8** [[CAST1]]
Expand Down