Skip to content

Commit b08450d

Browse files
committed
[AArch64] Skip storing of immutable stack arguments in tail calls
1 parent 82bd148 commit b08450d

File tree

5 files changed

+126
-57
lines changed

5 files changed

+126
-57
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9328,10 +9328,25 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
93289328
}
93299329
unsigned LocMemOffset = VA.getLocMemOffset();
93309330
int32_t Offset = LocMemOffset + BEAlign;
9331-
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
9332-
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
93339331

93349332
if (IsTailCall) {
9333+
// When the frame pointer is perfectly aligned for the tail call and the
9334+
// same stack argument is passed down, omit storing it if is immutable
9335+
// and already in the right offset.
9336+
if (FPDiff == 0) {
9337+
if (auto *LoadNode = dyn_cast<LoadSDNode>(Arg)) {
9338+
if (auto *FINode =
9339+
dyn_cast<FrameIndexSDNode>(LoadNode->getBasePtr())) {
9340+
MachineFrameInfo &MFI = MF.getFrameInfo();
9341+
int FI = FINode->getIndex();
9342+
if (LoadNode->getMemoryVT() == VA.getValVT() &&
9343+
MFI.isImmutableObjectIndex(FI) &&
9344+
Offset == MFI.getObjectOffset(FI))
9345+
continue;
9346+
}
9347+
}
9348+
}
9349+
93359350
Offset = Offset + FPDiff;
93369351
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
93379352

llvm/test/CodeGen/AArch64/darwinpcs-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55

66
; CHECK-LABEL: __ZThn16_N1C3addEPKcz:
77
; CHECK: b __ZN1C3addEPKcz
8+
89
; CHECK-LABEL: _tailTest:
910
; CHECK: b __ZN1C3addEPKcz
11+
1012
; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
11-
; CHECK: ldr w9, [sp, #4]
12-
; CHECK: str w9, [sp, #4]
1313
; CHECK: b __ZN1C1fEiiiiiiiiiz
1414

1515
%class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }

llvm/test/CodeGen/AArch64/scavenge-large-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; CHECK: add {{x[0-9]+}}, sp,
55

66
define void @caller(ptr %0, i16 %1, i16 %2, i8 %3, double %4, i16 %5, i8 %6, ptr %7, double %8, i32 %9, ptr %10, double %11, double %12, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double %16, double %17, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double %22, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] %26, i8 %27, i16 %28, i16 %29, i16 %30, i32 %31, [2 x i64] %32, [2 x i64] %33, [2 x i64] %34, [2 x i64] %35, [2 x i64] %36, i32 %37, i32 %38) {
7-
tail call void @callee(ptr %0, i16 %1, i16 %2, i8 %3, double 0.000000e+00, i16 %5, i8 %6, ptr %7, double 0.000000e+00, i32 %9, ptr %10, double 0.000000e+00, double 0.000000e+00, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double 0.000000e+00, double 0.000000e+00, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double 0.000000e+00, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] zeroinitializer, i8 %27, i16 0, i16 0, i16 %28, i32 0, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] %35, [2 x i64] %36, i32 0, i32 0)
7+
call void @callee(ptr %0, i16 %1, i16 %2, i8 %3, double 0.000000e+00, i16 %5, i8 %6, ptr %7, double 0.000000e+00, i32 %9, ptr %10, double 0.000000e+00, double 0.000000e+00, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double 0.000000e+00, double 0.000000e+00, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double 0.000000e+00, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] zeroinitializer, i8 %27, i16 0, i16 0, i16 %28, i32 0, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] %35, [2 x i64] %36, i32 0, i32 0)
88
ret void
99
}
1010

llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll

Lines changed: 51 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -11,67 +11,66 @@ target triple = "aarch64-unknown-linux-gnu"
1111
define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8,
1212
; CHECK-LABEL: func1:
1313
; CHECK: // %bb.0:
14-
; CHECK-NEXT: str x29, [sp, #-48]! // 8-byte Folded Spill
15-
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
16-
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
17-
; CHECK-NEXT: .cfi_def_cfa_offset 48
18-
; CHECK-NEXT: .cfi_offset w19, -8
19-
; CHECK-NEXT: .cfi_offset w20, -16
20-
; CHECK-NEXT: .cfi_offset w21, -24
21-
; CHECK-NEXT: .cfi_offset w22, -32
22-
; CHECK-NEXT: .cfi_offset w29, -48
14+
; CHECK-NEXT: sub sp, sp, #368
15+
; CHECK-NEXT: stp x29, x30, [sp, #336] // 16-byte Folded Spill
16+
; CHECK-NEXT: str x28, [sp, #352] // 8-byte Folded Spill
17+
; CHECK-NEXT: add x29, sp, #336
18+
; CHECK-NEXT: .cfi_def_cfa w29, 32
19+
; CHECK-NEXT: .cfi_offset w28, -16
20+
; CHECK-NEXT: .cfi_offset w30, -24
21+
; CHECK-NEXT: .cfi_offset w29, -32
2322
; CHECK-NEXT: ptrue p0.d
24-
; CHECK-NEXT: add x10, sp, #176
25-
; CHECK-NEXT: add x8, sp, #48
26-
; CHECK-NEXT: add x9, sp, #144
27-
; CHECK-NEXT: add x20, sp, #176
28-
; CHECK-NEXT: ldr x15, [sp, #104]
29-
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x10]
23+
; CHECK-NEXT: add x8, x29, #32
24+
; CHECK-NEXT: add x9, x29, #136
25+
; CHECK-NEXT: mov x12, #32 // =0x20
26+
; CHECK-NEXT: ldp x10, x11, [x29, #336]
3027
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
31-
; CHECK-NEXT: add x8, sp, #112
32-
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9]
28+
; CHECK-NEXT: add x8, x29, #72
29+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9]
3330
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8]
34-
; CHECK-NEXT: ldur q4, [sp, #88]
35-
; CHECK-NEXT: ldp x9, x8, [sp, #328]
36-
; CHECK-NEXT: ldr x19, [sp, #272]
37-
; CHECK-NEXT: ldp x11, x10, [sp, #312]
38-
; CHECK-NEXT: ldp x13, x12, [sp, #296]
39-
; CHECK-NEXT: ldp x18, x14, [sp, #280]
40-
; CHECK-NEXT: ldp x16, x17, [sp, #208]
41-
; CHECK-NEXT: ldp x21, x22, [sp, #352]
42-
; CHECK-NEXT: st1d { z3.d }, p0, [x20]
43-
; CHECK-NEXT: add x20, sp, #144
44-
; CHECK-NEXT: st1d { z2.d }, p0, [x20]
45-
; CHECK-NEXT: add x20, sp, #112
46-
; CHECK-NEXT: st1d { z1.d }, p0, [x20]
47-
; CHECK-NEXT: add x20, sp, #48
48-
; CHECK-NEXT: st1d { z0.d }, p0, [x20]
49-
; CHECK-NEXT: stp x21, x22, [sp, #352]
50-
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
51-
; CHECK-NEXT: stp x19, x18, [sp, #272]
52-
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
53-
; CHECK-NEXT: stp x16, x17, [sp, #208]
54-
; CHECK-NEXT: stur q4, [sp, #88]
55-
; CHECK-NEXT: str x15, [sp, #104]
56-
; CHECK-NEXT: stp x14, x13, [sp, #288]
57-
; CHECK-NEXT: stp x12, x11, [sp, #304]
58-
; CHECK-NEXT: stp x10, x9, [sp, #320]
59-
; CHECK-NEXT: str x8, [sp, #336]
60-
; CHECK-NEXT: ldr x29, [sp], #48 // 8-byte Folded Reload
61-
; CHECK-NEXT: b func2
31+
; CHECK-NEXT: add x8, x29, #104
32+
; CHECK-NEXT: add x9, x29, #288
33+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8]
34+
; CHECK-NEXT: add x8, x29, #168
35+
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x9]
36+
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x8]
37+
; CHECK-NEXT: add x8, x29, #256
38+
; CHECK-NEXT: ldr x9, [x29, #320]
39+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x8]
40+
; CHECK-NEXT: ldr x8, [x29, #200]
41+
; CHECK-NEXT: st1d { z6.d }, p0, [sp, x12, lsl #3]
42+
; CHECK-NEXT: mov x12, #28 // =0x1c
43+
; CHECK-NEXT: st1d { z5.d }, p0, [sp, x12, lsl #3]
44+
; CHECK-NEXT: mov x12, #17 // =0x11
45+
; CHECK-NEXT: st1d { z4.d }, p0, [sp, x12, lsl #3]
46+
; CHECK-NEXT: mov x12, #13 // =0xd
47+
; CHECK-NEXT: st1d { z3.d }, p0, [sp, x12, lsl #3]
48+
; CHECK-NEXT: mov x12, #9 // =0x9
49+
; CHECK-NEXT: st1d { z2.d }, p0, [sp, x12, lsl #3]
50+
; CHECK-NEXT: mov x12, #5 // =0x5
51+
; CHECK-NEXT: st1d { z1.d }, p0, [sp, x12, lsl #3]
52+
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
53+
; CHECK-NEXT: stp x10, x11, [sp, #304]
54+
; CHECK-NEXT: str x9, [sp, #288]
55+
; CHECK-NEXT: str x8, [sp, #168]
56+
; CHECK-NEXT: bl func2
57+
; CHECK-NEXT: ldp x29, x30, [sp, #336] // 16-byte Folded Reload
58+
; CHECK-NEXT: ldr x28, [sp, #352] // 8-byte Folded Reload
59+
; CHECK-NEXT: add sp, sp, #368
60+
; CHECK-NEXT: ret
6261
ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16,
6362
ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,
6463
ptr %v25, ptr %v26, ptr %v27, ptr %v28, ptr %v29, ptr %v30, ptr %v31, ptr %v32,
6564
ptr %v33, ptr %v34, ptr %v35, ptr %v36, ptr %v37, ptr %v38, ptr %v39, ptr %v40,
6665
ptr %v41, ptr %v42, ptr %v43, ptr %v44, ptr %v45, ptr %v46, ptr %v47, ptr %v48,
6766
i64 %v49) #0 {
68-
tail call void @func2(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8,
69-
ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr undef, ptr %v14, ptr %v15, ptr %v16,
70-
ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,
71-
ptr %v25, ptr %v26, ptr %v27, ptr %v28, ptr %v29, ptr %v30, ptr undef, ptr undef,
72-
ptr undef, ptr undef, ptr undef, ptr undef, ptr %v37, ptr %v38, ptr %v39, ptr %v40,
73-
ptr %v41, ptr %v42, ptr %v43, ptr %v44, ptr %v45, ptr undef, ptr %v47, ptr %v48,
74-
i64 undef)
67+
call void @func2(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8,
68+
ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr undef, ptr %v14, ptr %v15, ptr %v16,
69+
ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,
70+
ptr %v25, ptr %v26, ptr %v27, ptr %v28, ptr %v29, ptr %v30, ptr undef, ptr undef,
71+
ptr undef, ptr undef, ptr undef, ptr undef, ptr %v37, ptr %v38, ptr %v39, ptr %v40,
72+
ptr %v41, ptr %v42, ptr %v43, ptr %v44, ptr %v45, ptr undef, ptr %v47, ptr %v48,
73+
i64 undef)
7574
ret void
7675
}
7776

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; RUN: llc %s -o - | FileCheck %s
2+
3+
; Tail calls which have stack arguments in the same offsets as the caller do not
4+
; need to load and store the arguments from the stack.
5+
6+
target triple = "aarch64-none-linux-gnu"
7+
8+
declare i32 @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
9+
10+
; CHECK-LABEL: wrapper_func:
11+
define i32 @wrapper_func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
12+
; CHECK: // %bb.
13+
; CHECK-NEXT: b func
14+
%call = tail call i32 @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
15+
ret i32 %call
16+
}
17+
18+
; CHECK-LABEL: wrapper_func_zero_arg:
19+
define i32 @wrapper_func_zero_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
20+
; CHECK: // %bb.
21+
; CHECK-NEXT: mov w0, wzr
22+
; CHECK-NEXT: b func
23+
%call = tail call i32 @func(i32 0, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
24+
ret i32 %call
25+
}
26+
27+
; CHECK-LABEL: wrapper_func_zero_stack_arg:
28+
define i32 @wrapper_func_zero_stack_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
29+
; CHECK: // %bb.
30+
; CHECK-NEXT: str wzr, [sp, #8]
31+
; CHECK-NEXT: b func
32+
%call = tail call i32 @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 0)
33+
ret i32 %call
34+
}
35+
36+
; CHECK-LABEL: wrapper_func_overriden_arg:
37+
define i32 @wrapper_func_overriden_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
38+
; CHECK: // %bb.
39+
; CHECK-NEXT: mov w1, w0
40+
; CHECK-NEXT: mov w0, wzr
41+
; CHECK-NEXT: b func
42+
%call = tail call i32 @func(i32 0, i32 %a, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j)
43+
ret i32 %call
44+
}
45+
46+
; CHECK-LABEL: wrapper_func_overriden_stack_arg:
47+
define i32 @wrapper_func_overriden_stack_arg(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j) {
48+
; CHECK: // %bb.
49+
; CHECK-NEXT: ldr w8, [sp]
50+
; CHECK-NEXT: str wzr, [sp]
51+
; CHECK-NEXT: str w8, [sp, #8]
52+
; CHECK-NEXT: b func
53+
%call = tail call i32 @func(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 0, i32 %i)
54+
ret i32 %call
55+
}

0 commit comments

Comments
 (0)