Skip to content

Commit 000c9e9

Browse files
salltoIanWood1
authored andcommitted
[Inliner] Preserve alignment of byval arguments (llvm#137455)
Previously the inliner always produced a memcpy with alignment 1 for src and destination, leading to potentially suboptimal Codegen. Since the Src ptr alignment is only available through the CallBase it has to be passed to HandleByValArgumentInit. Dst Alignment is already known so it doesn't have to be passed along. If there is no specified Src Alignment my changes cause the ptr to have no align data attached instead of align 1 as before (see inline-tail.ll), I believe this is fine but since I'm a first time contributor, please confirm. My changes are already covered by 4 existing regression tests, so I did not add any additional ones. The example from llvm#45778 now results in: ```C opt -S -passes=inline,instcombine,sroa,instcombine test.ll define dso_local i32 @test(ptr %t) { entry: %.sroa.0.0.copyload = load ptr, ptr %t, align 8 # this used to be align 1 in the original issue %arrayidx.i = getelementptr inbounds nuw i8, ptr %.sroa.0.0.copyload, i64 24 %0 = load i32, ptr %arrayidx.i, align 4 ret i32 %0 } ``` Fixes llvm#45778.
1 parent 72c1790 commit 000c9e9

File tree

6 files changed

+24
-19
lines changed

6 files changed

+24
-19
lines changed

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,19 +1703,19 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
17031703
}
17041704

17051705
static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1706-
Module *M, BasicBlock *InsertBlock,
1706+
MaybeAlign SrcAlign, Module *M,
1707+
BasicBlock *InsertBlock,
17071708
InlineFunctionInfo &IFI,
17081709
Function *CalledFunc) {
17091710
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
17101711

17111712
Value *Size =
17121713
Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
17131714

1714-
// Always generate a memcpy of alignment 1 here because we don't know
1715-
// the alignment of the src pointer. Other optimizations can infer
1716-
// better alignment.
1717-
CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1718-
/*SrcAlign*/ Align(1), Size);
1715+
Align DstAlign = Dst->getPointerAlignment(M->getDataLayout());
1716+
1717+
// Generate a memcpy with the correct alignments.
1718+
CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size);
17191719

17201720
// The verifier requires that all calls of debug-info-bearing functions
17211721
// from debug-info-bearing functions have a debug location (for inlining
@@ -2629,9 +2629,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
26292629
struct ByValInit {
26302630
Value *Dst;
26312631
Value *Src;
2632+
MaybeAlign SrcAlign;
26322633
Type *Ty;
26332634
};
2634-
// Keep a list of pair (dst, src) to emit byval initializations.
2635+
// Keep a list of tuples (dst, src, src_align) to emit byval
2636+
// initializations. Src Alignment is only available though the callbase,
2637+
// therefore has to be saved.
26352638
SmallVector<ByValInit, 4> ByValInits;
26362639

26372640
// When inlining a function that contains noalias scope metadata,
@@ -2661,8 +2664,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
26612664
&CB, CalledFunc, IFI,
26622665
CalledFunc->getParamAlign(ArgNo));
26632666
if (ActualArg != *AI)
2664-
ByValInits.push_back(
2665-
{ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
2667+
ByValInits.push_back({ActualArg, (Value *)*AI,
2668+
CB.getParamAlign(ArgNo),
2669+
CB.getParamByValType(ArgNo)});
26662670
}
26672671

26682672
VMap[&*I] = ActualArg;
@@ -2712,8 +2716,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
27122716

27132717
// Inject byval arguments initialization.
27142718
for (ByValInit &Init : ByValInits)
2715-
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
2716-
&*FirstNewBlock, IFI, CalledFunc);
2719+
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Init.SrcAlign,
2720+
Caller->getParent(), &*FirstNewBlock, IFI,
2721+
CalledFunc);
27172722

27182723
std::optional<OperandBundleUse> ParentDeopt =
27192724
CB.getOperandBundle(LLVMContext::OB_deopt);

llvm/test/Transforms/Inline/byval-align.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define void @byval_caller(ptr nocapture align 64 %a, ptr %b) #0 {
2929
; CHECK-NEXT: entry:
3030
; CHECK-NEXT: [[A1:%.*]] = alloca float, align 128
3131
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A1]])
32-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[A1]], ptr align 1 [[A]], i64 4, i1 false)
32+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[A1]], ptr align 128 [[A]], i64 4, i1 false)
3333
; CHECK-NEXT: [[LOAD_I:%.*]] = load float, ptr [[A1]], align 4
3434
; CHECK-NEXT: [[B_IDX_I:%.*]] = getelementptr inbounds float, ptr [[B]], i64 8
3535
; CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[LOAD_I]], 2.000000e+00

llvm/test/Transforms/Inline/byval-with-non-alloca-addrspace.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ define i64 @foo(ptr %arg) {
2727
; CHECK-SAME: ptr [[ARG:%.*]]) {
2828
; CHECK-NEXT: [[ARG1:%.*]] = alloca [[STRUCT:%.*]], align 8
2929
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[ARG1]])
30-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[ARG1]], ptr align 1 [[ARG]], i64 16, i1 false)
30+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ARG1]], ptr align 8 [[ARG]], i64 16, i1 false)
3131
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT]], ptr [[ARG1]], i64 0, i32 1
3232
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
3333
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[ARG1]])

llvm/test/Transforms/Inline/byval.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define i32 @test1() nounwind {
3636
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1
3737
; CHECK-NEXT: store i64 2, ptr [[TMP4]], align 4
3838
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
39-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
39+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[S1]], ptr [[S]], i64 12, i1 false)
4040
; CHECK-NEXT: [[TMP1_I:%.*]] = load i32, ptr [[S1]], align 4
4141
; CHECK-NEXT: [[TMP2_I:%.*]] = add i32 [[TMP1_I]], 1
4242
; CHECK-NEXT: store i32 [[TMP2_I]], ptr [[S1]], align 4
@@ -105,7 +105,7 @@ define void @test3() nounwind {
105105
; CHECK-NEXT: [[S1:%.*]] = alloca [[STRUCT_SS:%.*]], align 64
106106
; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1
107107
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]])
108-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false)
108+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 64 [[S1]], ptr align 64 [[S]], i64 12, i1 false)
109109
; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]]
110110
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]])
111111
; CHECK-NEXT: ret void
@@ -158,7 +158,7 @@ define i32 @test5() {
158158
; CHECK-NEXT: entry:
159159
; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_S0:%.*]], align 8
160160
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[B]])
161-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[B]], ptr align 1 @b, i64 4, i1 false)
161+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[B]], ptr align 4 @b, i64 4, i1 false)
162162
; CHECK-NEXT: store i32 0, ptr @b, align 4
163163
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B]], align 4
164164
; CHECK-NEXT: store i32 [[TMP0]], ptr @a, align 4

llvm/test/Transforms/Inline/inline-deferred-instsimplify.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ define i32 @main() {
5353
; CHECK-LABEL: define i32 @main() {
5454
; CHECK-NEXT: [[G_VAR:%.*]] = alloca [[STRUCT_A:%.*]], align 8
5555
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 20, ptr [[G_VAR]])
56-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[G_VAR]], ptr align 1 @g_var, i64 20, i1 false)
56+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[G_VAR]], ptr align 8 @g_var, i64 20, i1 false)
5757
; CHECK-NEXT: [[VAL_I:%.*]] = load i32, ptr [[G_VAR]], align 8
5858
; CHECK-NEXT: [[DOTNOT_I:%.*]] = icmp eq i32 [[VAL_I]], 0
5959
; CHECK-NEXT: br i1 [[DOTNOT_I]], label [[CHECK_POINTERS_ARE_EQUAL_I:%.*]], label [[STORE_PTR_IN_GVAR_I:%.*]]

llvm/test/Transforms/Inline/inline-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ define void @test_byval_a(ptr byval(i32) %p) {
6565
; CHECK-SAME: (ptr byval(i32) [[P:%.*]]) {
6666
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
6767
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
68-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
68+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
6969
; CHECK-NEXT: musttail call void @test_byval_c(ptr byval(i32) [[P1]])
7070
; CHECK-NEXT: ret void
7171
;
@@ -88,7 +88,7 @@ define void @test_dynalloca_a(ptr byval(i32) %p, i32 %n) {
8888
; CHECK-NEXT: [[P1:%.*]] = alloca i32, align 4
8989
; CHECK-NEXT: [[SAVEDSTACK:%.*]] = call ptr @llvm.stacksave.p0()
9090
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[P1]])
91-
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[P1]], ptr align 1 [[P]], i64 4, i1 false)
91+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[P1]], ptr [[P]], i64 4, i1 false)
9292
; CHECK-NEXT: [[BUF_I:%.*]] = alloca i8, i32 [[N]], align 1
9393
; CHECK-NEXT: call void @escape(ptr [[BUF_I]])
9494
; CHECK-NEXT: musttail call void @test_dynalloca_c(ptr byval(i32) [[P1]], i32 [[N]])

0 commit comments

Comments
 (0)