Skip to content

Commit 0e63180

Browse files
authored
Reapply "[Aarch64] [ISel] Don't save vaargs registers if vaargs are unused" (#131459)
This reapplies original commit e122483 Second try, but with win64 removed from the scope, since it was somehow broken by original commit. ### Original PR description If vaargs are not used there is no need to save them. LLVM already implements such optimization for x86, as well as gcc [1]. Some ABI tests are kept almost as-is, except for stack offsets, by just adding llvm.va_start. Only laapcs_vararg_frame.ll test was rewritten to match new behavior. [1] https://godbolt.org/z/GWWKr8xMd
1 parent 5c02f1a commit 0e63180

File tree

5 files changed

+82
-25
lines changed

5 files changed

+82
-25
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8241,7 +8241,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
82418241
}
82428242

82438243
// varargs
8244-
if (isVarArg) {
8244+
// Note that IsWin64 part is required to prevent odd miscompilations on arm64
8245+
// windows platforms. For more info refer to GH#126780 PR comments.
8246+
if (isVarArg &&
8247+
(DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) {
82458248
if (!Subtarget->isTargetDarwin() || IsWin64) {
82468249
// The AAPCS variadic function ABI is identical to the non-variadic
82478250
// one. As a result there may be more arguments in registers and we should

llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,11 @@
22
; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s
33
; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL
44

5+
%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
6+
57
define void @va(i32 %count, half %f, ...) nounwind {
68
; CHECK-LABEL: va:
79
; CHECK: // %bb.0: // %entry
8-
; CHECK-NEXT: sub sp, sp, #176
9-
; CHECK-NEXT: stp x2, x3, [sp, #128]
10-
; CHECK-NEXT: str x1, [sp, #120]
11-
; CHECK-NEXT: stp x4, x5, [sp, #144]
12-
; CHECK-NEXT: stp x6, x7, [sp, #160]
13-
; CHECK-NEXT: stp q1, q2, [sp]
14-
; CHECK-NEXT: stp q3, q4, [sp, #32]
15-
; CHECK-NEXT: stp q5, q6, [sp, #64]
16-
; CHECK-NEXT: str q7, [sp, #96]
17-
; CHECK-NEXT: add sp, sp, #176
1810
; CHECK-NEXT: ret
1911
;
2012
; GISEL-LABEL: va:
@@ -33,3 +25,55 @@ define void @va(i32 %count, half %f, ...) nounwind {
3325
entry:
3426
ret void
3527
}
28+
29+
define void @va_used(i32 %count, half %f, ...) nounwind {
30+
; CHECK-LABEL: va_used:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: sub sp, sp, #208
33+
; CHECK-NEXT: mov x8, #-56 // =0xffffffffffffffc8
34+
; CHECK-NEXT: mov x9, sp
35+
; CHECK-NEXT: add x10, sp, #120
36+
; CHECK-NEXT: movk x8, #65424, lsl #32
37+
; CHECK-NEXT: add x9, x9, #112
38+
; CHECK-NEXT: stp x2, x3, [sp, #128]
39+
; CHECK-NEXT: stp x9, x8, [sp, #192]
40+
; CHECK-NEXT: add x8, x10, #56
41+
; CHECK-NEXT: add x9, sp, #208
42+
; CHECK-NEXT: str x1, [sp, #120]
43+
; CHECK-NEXT: stp x4, x5, [sp, #144]
44+
; CHECK-NEXT: stp x6, x7, [sp, #160]
45+
; CHECK-NEXT: stp q1, q2, [sp]
46+
; CHECK-NEXT: stp q3, q4, [sp, #32]
47+
; CHECK-NEXT: stp q5, q6, [sp, #64]
48+
; CHECK-NEXT: str q7, [sp, #96]
49+
; CHECK-NEXT: stp x9, x8, [sp, #176]
50+
; CHECK-NEXT: add sp, sp, #208
51+
; CHECK-NEXT: ret
52+
;
53+
; GISEL-LABEL: va_used:
54+
; GISEL: // %bb.0:
55+
; GISEL-NEXT: sub sp, sp, #208
56+
; GISEL-NEXT: mov x8, sp
57+
; GISEL-NEXT: add x9, sp, #208
58+
; GISEL-NEXT: add x10, sp, #208
59+
; GISEL-NEXT: stp x9, x10, [x8]
60+
; GISEL-NEXT: add x9, sp, #144
61+
; GISEL-NEXT: mov w10, #-112 // =0xffffff90
62+
; GISEL-NEXT: str x9, [x8, #16]
63+
; GISEL-NEXT: mov w9, #-56 // =0xffffffc8
64+
; GISEL-NEXT: stp x1, x2, [sp, #152]
65+
; GISEL-NEXT: stp x3, x4, [sp, #168]
66+
; GISEL-NEXT: stp x5, x6, [sp, #184]
67+
; GISEL-NEXT: str x7, [sp, #200]
68+
; GISEL-NEXT: stp q1, q2, [sp, #32]
69+
; GISEL-NEXT: stp q3, q4, [sp, #64]
70+
; GISEL-NEXT: stp q5, q6, [sp, #96]
71+
; GISEL-NEXT: str q7, [sp, #128]
72+
; GISEL-NEXT: stp w9, w10, [x8, #24]
73+
; GISEL-NEXT: add sp, sp, #208
74+
; GISEL-NEXT: ret
75+
%valist = alloca %struct.__va_list
76+
call void @llvm.va_start(ptr %valist)
77+
call void @llvm.va_end(ptr %valist)
78+
ret void
79+
}

llvm/test/CodeGen/AArch64/alloca.ll

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ define i64 @test_alloca_with_local(i64 %n) {
6262
; CHECK: ret
6363
}
6464

65+
%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
66+
6567
define void @test_variadic_alloca(i64 %n, ...) {
6668
; CHECK-LABEL: test_variadic_alloca:
6769

@@ -77,31 +79,33 @@ define void @test_variadic_alloca(i64 %n, ...) {
7779

7880
; CHECK: stp x29, x30, [sp, #-16]!
7981
; CHECK: mov x29, sp
80-
; CHECK: sub sp, sp, #192
81-
; CHECK-DAG: stp q6, q7, [x29, #-96]
82+
; CHECK: sub sp, sp, #224
83+
; CHECK-DAG: stp q6, q7, [x29, #-128]
8284
; [...]
83-
; CHECK-DAG: stp q0, q1, [x29, #-192]
85+
; CHECK-DAG: stp q2, q3, [x29, #-192]
8486

85-
; CHECK-DAG: stp x5, x6, [x29, #-24]
87+
; CHECK-DAG: stp x5, x6, [x29, #-56]
8688
; [...]
87-
; CHECK-DAG: stp x1, x2, [x29, #-56]
89+
; CHECK-DAG: stp x1, x2, [x29, #-88]
8890

8991
; CHECK-NOFP-ARM64: stp x29, x30, [sp, #-16]!
9092
; CHECK-NOFP-ARM64: mov x29, sp
91-
; CHECK-NOFP-ARM64: sub sp, sp, #64
92-
; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-24]
93+
; CHECK-NOFP-ARM64: sub sp, sp, #16
94+
; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-56]
9395
; [...]
94-
; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-40]
96+
; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-72]
9597
; [...]
96-
; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-56]
98+
; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-88]
9799
; [...]
98100
; CHECK-NOFP-ARM64: mov x8, sp
99-
101+
%valist = alloca %struct.__va_list
102+
call void @llvm.va_start(ptr %valist)
100103
%addr = alloca i8, i64 %n
101104

102105
call void @use_addr(ptr %addr)
103106
; CHECK: bl use_addr
104107

108+
call void @llvm.va_end(ptr %valist)
105109
ret void
106110

107111
; CHECK-NOFP-AARCH64: sub sp, x29, #64

llvm/test/CodeGen/AArch64/darwinpcs-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
; CHECK-LABEL: _tailTest:
99
; CHECK: b __ZN1C3addEPKcz
1010
; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
11-
; CHECK: ldr w9, [sp, #4]
12-
; CHECK: str w9, [sp, #4]
11+
; CHECK: ldr w8, [sp, #4]
12+
; CHECK: str w8, [sp, #4]
1313
; CHECK: b __ZN1C1fEiiiiiiiiiz
1414

1515
%class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }

llvm/test/CodeGen/AArch64/vararg-tallcall.ll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,22 @@ target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
88

99
%class.X = type { i8 }
1010
%struct.B = type { ptr }
11+
%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
1112

1213
$"??_9B@@$BA@AA" = comdat any
1314

1415
; Function Attrs: noinline optnone
1516
define linkonce_odr void @"??_9B@@$BA@AA"(ptr %this, ...) #1 comdat align 2 {
1617
entry:
18+
%valist = alloca %struct.__va_list
19+
call void @llvm.va_start.p0(ptr %valist)
1720
%this.addr = alloca ptr, align 8
1821
store ptr %this, ptr %this.addr, align 8
1922
%this1 = load ptr, ptr %this.addr, align 8
2023
call void asm sideeffect "", "~{d0}"()
2124
%vtable = load ptr, ptr %this1, align 8
2225
%0 = load ptr, ptr %vtable, align 8
26+
call void @llvm.va_end.p0(ptr %valist)
2327
musttail call void (ptr, ...) %0(ptr %this1, ...)
2428
ret void
2529
; No predecessors!
@@ -36,7 +40,9 @@ attributes #1 = { noinline optnone "thunk" }
3640
; CHECK-EC: mov v7.16b, v0.16b
3741
; CHECK-EC: ldr x9, [x0]
3842
; CHECK-EC: ldr x11, [x9]
43+
; CHECH-EC: add x4, sp, #96
3944
; CHECK-EC: mov v0.16b, v7.16b
40-
; CHECK-EC: add x4, sp, #64
41-
; CHECK-EC: add sp, sp, #64
45+
; CHECK-EC: add x4, sp, #96
46+
; CHECK-EC: ldr x30, [sp, #48]
47+
; CHECK-EC: add sp, sp, #96
4248
; CHECK-EC: br x11

0 commit comments

Comments
 (0)