Skip to content

Commit 2cbbd25

Browse files
committed
[Inliner] Fix Issue #45778: Inliner now respects the alignment of parameters passed by value
1 parent 7581aa1 commit 2cbbd25

File tree

6 files changed

+26
-19
lines changed

6 files changed

+26
-19
lines changed

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,19 +1703,21 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
17031703
}
17041704

17051705
static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1706-
Module *M, BasicBlock *InsertBlock,
1706+
MaybeAlign SrcAlign, Module *M,
1707+
BasicBlock *InsertBlock,
17071708
InlineFunctionInfo &IFI,
17081709
Function *CalledFunc) {
17091710
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
17101711

17111712
Value *Size =
17121713
Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
17131714

1714-
// Always generate a memcpy of alignment 1 here because we don't know
1715-
// the alignment of the src pointer. Other optimizations can infer
1716-
// better alignment.
1717-
CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1718-
/*SrcAlign*/ Align(1), Size);
1715+
Align DstAlign = Dst->getPointerAlignment(M->getDataLayout());
1716+
1717+
// Generate a mempcpy with the correct alignments. At this point, the
1718+
// alignment of the src pointer is unknown. Therefore its alignment has to be
1719+
// passed in from the creation of the src ptr.
1720+
CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
17191721

17201722
// The verifier requires that all calls of debug-info-bearing functions
17211723
// from debug-info-bearing functions have a debug location (for inlining
@@ -2629,9 +2631,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
26292631
struct ByValInit {
26302632
Value *Dst;
26312633
Value *Src;
2634+
MaybeAlign SrcAlign;
26322635
Type *Ty;
26332636
};
2634-
// Keep a list of pair (dst, src) to emit byval initializations.
2637+
// Keep a list of tuples (dst, src, src_align) to emit byval
2638+
// initializations. Src Alignment is only available though the callbase,
2639+
// therefore has to be saved.
26352640
SmallVector<ByValInit, 4> ByValInits;
26362641

26372642
// When inlining a function that contains noalias scope metadata,
@@ -2661,8 +2666,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
26612666
&CB, CalledFunc, IFI,
26622667
CalledFunc->getParamAlign(ArgNo));
26632668
if (ActualArg != *AI)
2664-
ByValInits.push_back(
2665-
{ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
2669+
ByValInits.push_back({ActualArg, (Value *)*AI,
2670+
CB.getParamAlign(ArgNo),
2671+
CB.getParamByValType(ArgNo)});
26662672
}
26672673

26682674
VMap[&*I] = ActualArg;
@@ -2712,8 +2718,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
27122718

27132719
// Inject byval arguments initialization.
27142720
for (ByValInit &Init : ByValInits)
2715-
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
2716-
&*FirstNewBlock, IFI, CalledFunc);
2721+
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Init.SrcAlign,
2722+
Caller->getParent(), &*FirstNewBlock, IFI,
2723+
CalledFunc);
27172724

27182725
std::optional<OperandBundleUse> ParentDeopt =
27192726
CB.getOperandBundle(LLVMContext::OB_deopt);

llvm/test/Transforms/Inline/byval-align.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define void @byval_caller(ptr nocapture align 64 %a, ptr %b) #0 {
2929
; CHECK-NEXT: entry:
3030
; CHECK-NEXT: [[A1:%.*]] = alloca float, align 128
3131
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A1]])
32-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[A1]], ptr align 1 [[A]], i64 4, i1 false)
32+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[A1]], ptr align 128 [[A]], i64 4, i1 false)
3333
; CHECK-NEXT: [[LOAD_I:%.*]] = load float, ptr [[A1]], align 4
3434
; CHECK-NEXT: [[B_IDX_I:%.*]] = getelementptr inbounds float, ptr [[B]], i64 8
3535
; CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[LOAD_I]], 2.000000e+00

llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i64 @foo(ptr %arg) {
2727
; CHECK-SAME: ptr [[ARG:%.*]]) {
2828
; CHECK-NEXT: [[ARG1:%.*]] = alloca [[STRUCT:%.*]], align 8
2929
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[ARG1]])
30-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ARG1]], ptr align 1 [[ARG]], i64 16, i1 false)
30+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ARG1]], ptr align 8 [[ARG]], i64 16, i1 false)
3131
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT]], ptr [[ARG1]], i64 0, i32 1
3232
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
3333
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[ARG1]])

llvm/test/Transforms/Inline/byval.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define i32 @test1() nounwind {
3636
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1
3737
; CHECK-NEXT: store i64 2, ptr [[TMP4]], align 4
3838
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
39-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
39+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[S1]], ptr [[S]], i64 12, i1 false)
4040
; CHECK-NEXT: [[TMP1_I:%.*]] = load i32, ptr [[S1]], align 4
4141
; CHECK-NEXT: [[TMP2_I:%.*]] = add i32 [[TMP1_I]], 1
4242
; CHECK-NEXT: store i32 [[TMP2_I]], ptr [[S1]], align 4
@@ -105,7 +105,7 @@ define void @test3() nounwind {
105105
; CHECK-NEXT: [[S1:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
106106
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
107107
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
108-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
108+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 64 [[S1]], ptr align 64 [[S]], i64 12, i1 false)
109109
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
110110
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
111111
; CHECK-NEXT: ret void
@@ -158,7 +158,7 @@ define i32 @test5() {
158158
; CHECK-NEXT: entry:
159159
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_S0:%.*]], align 8
160160
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[B]])
161-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[B]], ptr align 1 @b, i64 4, i1 false)
161+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[B]], ptr align 4 @b, i64 4, i1 false)
162162
; CHECK-NEXT: store i32 0, ptr @b, align 4
163163
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4
164164
; CHECK-NEXT: store i32 [[TMP0]], ptr @a, align 4

llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ define i32 @main() {
5353
; CHECK-LABEL: define i32 @main() {
5454
; CHECK-NEXT: [[G_VAR:%.*]] = alloca [[STRUCT_A:%.*]], align 8
5555
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[G_VAR]])
56-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[G_VAR]], ptr align 1 @g_var, i64 20, i1 false)
56+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[G_VAR]], ptr align 8 @g_var, i64 20, i1 false)
5757
; CHECK-NEXT: [[VAL_I:%.*]] = load i32, ptr [[G_VAR]], align 8
5858
; CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[VAL_I]], 0
5959
; CHECK-NEXT: br i1 [[DOTNOT_I]], label [[CHECK_POINTERS_ARE_EQUAL_I:%.*]], label [[STORE_PTR_IN_GVAR_I:%.*]]

llvm/test/Transforms/Inline/inline-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ define void @test_byval_a(ptr byval(i32) %p) {
6565
; CHECK-SAME: (ptr byval(i32) [[P:%.*]]) {
6666
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
6767
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
68-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
68+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
6969
; CHECK-NEXT: musttail call void @test_byval_c(ptr byval(i32) [[P1]])
7070
; CHECK-NEXT: ret void
7171
;
@@ -88,7 +88,7 @@ define void @test_dynalloca_a(ptr byval(i32) %p, i32 %n) {
8888
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
8989
; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
9090
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
91-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
91+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
9292
; CHECK-NEXT: [[BUF_I:%.*]] = alloca i8, i32 [[N]], align 1
9393
; CHECK-NEXT: call void @escape(ptr [[BUF_I]])
9494
; CHECK-NEXT: musttail call void @test_dynalloca_c(ptr byval(i32) [[P1]], i32 [[N]])

0 commit comments

Comments
 (0)