-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reapply "[Aarch64] [ISel] Don't save vaargs registers if vaargs are unused" #131459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Pavel Skripkin (pskrgag) ChangesThis reapplies original commit e122483 Second try, but with win64 removed from the scope, since it was somehow broken by original commit. Original PR descriptionIf vaargs are not used there is no need to save them. LLVM already implements such optimization for x86, as well as gcc [1]. Some ABI tests are kept almost as-is, except for stack offsets, by just adding llvm.va_start. Only laapcs_vararg_frame.ll test was rewritten to match new behavior. [1] https://godbolt.org/z/GWWKr8xMd Full diff: https://github.com/llvm/llvm-project/pull/131459.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a9b4965e32b4c..059789cddc342 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8230,7 +8230,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
// varargs
- if (isVarArg) {
+ // Note that IsWin64 part is required to prevent odd miscompilations on arm64 windows
+ // platforms. For more info refer to GH#126780 PR comments.
+ if (isVarArg && (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) {
if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
index 7892e892a2412..6ea520ddcd6cd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
@@ -2,19 +2,11 @@
; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s
; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
define void @va(i32 %count, half %f, ...) nounwind {
; CHECK-LABEL: va:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #176
-; CHECK-NEXT: stp x2, x3, [sp, #128]
-; CHECK-NEXT: str x1, [sp, #120]
-; CHECK-NEXT: stp x4, x5, [sp, #144]
-; CHECK-NEXT: stp x6, x7, [sp, #160]
-; CHECK-NEXT: stp q1, q2, [sp]
-; CHECK-NEXT: stp q3, q4, [sp, #32]
-; CHECK-NEXT: stp q5, q6, [sp, #64]
-; CHECK-NEXT: str q7, [sp, #96]
-; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
;
; GISEL-LABEL: va:
@@ -33,3 +25,55 @@ define void @va(i32 %count, half %f, ...) nounwind {
entry:
ret void
}
+
+define void @va_used(i32 %count, half %f, ...) nounwind {
+; CHECK-LABEL: va_used:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #208
+; CHECK-NEXT: mov x8, #-56 // =0xffffffffffffffc8
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: add x10, sp, #120
+; CHECK-NEXT: movk x8, #65424, lsl #32
+; CHECK-NEXT: add x9, x9, #112
+; CHECK-NEXT: stp x2, x3, [sp, #128]
+; CHECK-NEXT: stp x9, x8, [sp, #192]
+; CHECK-NEXT: add x8, x10, #56
+; CHECK-NEXT: add x9, sp, #208
+; CHECK-NEXT: str x1, [sp, #120]
+; CHECK-NEXT: stp x4, x5, [sp, #144]
+; CHECK-NEXT: stp x6, x7, [sp, #160]
+; CHECK-NEXT: stp q1, q2, [sp]
+; CHECK-NEXT: stp q3, q4, [sp, #32]
+; CHECK-NEXT: stp q5, q6, [sp, #64]
+; CHECK-NEXT: str q7, [sp, #96]
+; CHECK-NEXT: stp x9, x8, [sp, #176]
+; CHECK-NEXT: add sp, sp, #208
+; CHECK-NEXT: ret
+;
+; GISEL-LABEL: va_used:
+; GISEL: // %bb.0:
+; GISEL-NEXT: sub sp, sp, #208
+; GISEL-NEXT: mov x8, sp
+; GISEL-NEXT: add x9, sp, #208
+; GISEL-NEXT: add x10, sp, #208
+; GISEL-NEXT: stp x9, x10, [x8]
+; GISEL-NEXT: add x9, sp, #144
+; GISEL-NEXT: mov w10, #-112 // =0xffffff90
+; GISEL-NEXT: str x9, [x8, #16]
+; GISEL-NEXT: mov w9, #-56 // =0xffffffc8
+; GISEL-NEXT: stp x1, x2, [sp, #152]
+; GISEL-NEXT: stp x3, x4, [sp, #168]
+; GISEL-NEXT: stp x5, x6, [sp, #184]
+; GISEL-NEXT: str x7, [sp, #200]
+; GISEL-NEXT: stp q1, q2, [sp, #32]
+; GISEL-NEXT: stp q3, q4, [sp, #64]
+; GISEL-NEXT: stp q5, q6, [sp, #96]
+; GISEL-NEXT: str q7, [sp, #128]
+; GISEL-NEXT: stp w9, w10, [x8, #24]
+; GISEL-NEXT: add sp, sp, #208
+; GISEL-NEXT: ret
+ %valist = alloca %struct.__va_list
+ call void @llvm.va_start(ptr %valist)
+ call void @llvm.va_end(ptr %valist)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll
index ca3a500d79f32..119b84e352c74 100644
--- a/llvm/test/CodeGen/AArch64/alloca.ll
+++ b/llvm/test/CodeGen/AArch64/alloca.ll
@@ -62,6 +62,8 @@ define i64 @test_alloca_with_local(i64 %n) {
; CHECK: ret
}
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
define void @test_variadic_alloca(i64 %n, ...) {
; CHECK-LABEL: test_variadic_alloca:
@@ -77,31 +79,33 @@ define void @test_variadic_alloca(i64 %n, ...) {
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #192
-; CHECK-DAG: stp q6, q7, [x29, #-96]
+; CHECK: sub sp, sp, #224
+; CHECK-DAG: stp q6, q7, [x29, #-128]
; [...]
-; CHECK-DAG: stp q0, q1, [x29, #-192]
+; CHECK-DAG: stp q2, q3, [x29, #-192]
-; CHECK-DAG: stp x5, x6, [x29, #-24]
+; CHECK-DAG: stp x5, x6, [x29, #-56]
; [...]
-; CHECK-DAG: stp x1, x2, [x29, #-56]
+; CHECK-DAG: stp x1, x2, [x29, #-88]
; CHECK-NOFP-ARM64: stp x29, x30, [sp, #-16]!
; CHECK-NOFP-ARM64: mov x29, sp
-; CHECK-NOFP-ARM64: sub sp, sp, #64
-; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-24]
+; CHECK-NOFP-ARM64: sub sp, sp, #16
+; CHECK-NOFP-ARM64-DAG: stp x5, x6, [x29, #-56]
; [...]
-; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-40]
+; CHECK-NOFP-ARM64-DAG: stp x3, x4, [x29, #-72]
; [...]
-; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-56]
+; CHECK-NOFP-ARM64-DAG: stp x1, x2, [x29, #-88]
; [...]
; CHECK-NOFP-ARM64: mov x8, sp
-
+ %valist = alloca %struct.__va_list
+ call void @llvm.va_start(ptr %valist)
%addr = alloca i8, i64 %n
call void @use_addr(ptr %addr)
; CHECK: bl use_addr
+ call void @llvm.va_end(ptr %valist)
ret void
; CHECK-NOFP-AARCH64: sub sp, x29, #64
diff --git a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
index 5d3c755d0d73d..da176894c48a9 100644
--- a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
+++ b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
@@ -8,8 +8,8 @@
; CHECK-LABEL: _tailTest:
; CHECK: b __ZN1C3addEPKcz
; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
-; CHECK: ldr w9, [sp, #4]
-; CHECK: str w9, [sp, #4]
+; CHECK: ldr w8, [sp, #4]
+; CHECK: str w8, [sp, #4]
; CHECK: b __ZN1C1fEiiiiiiiiiz
%class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 812837639196e..7ee76c8ad50a2 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -8,18 +8,22 @@ target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
%class.X = type { i8 }
%struct.B = type { ptr }
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
$"??_9B@@$BA@AA" = comdat any
; Function Attrs: noinline optnone
define linkonce_odr void @"??_9B@@$BA@AA"(ptr %this, ...) #1 comdat align 2 {
entry:
+ %valist = alloca %struct.__va_list
+ call void @llvm.va_start.p0(ptr %valist)
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr, align 8
call void asm sideeffect "", "~{d0}"()
%vtable = load ptr, ptr %this1, align 8
%0 = load ptr, ptr %vtable, align 8
+ call void @llvm.va_end.p0(ptr %valist)
musttail call void (ptr, ...) %0(ptr %this1, ...)
ret void
; No predecessors!
@@ -36,7 +40,9 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK-EC: mov v7.16b, v0.16b
; CHECK-EC: ldr x9, [x0]
; CHECK-EC: ldr x11, [x9]
+; CHECH-EC: add x4, sp, #96
; CHECK-EC: mov v0.16b, v7.16b
-; CHECK-EC: add x4, sp, #64
-; CHECK-EC: add sp, sp, #64
+; CHECK-EC: add x4, sp, #96
+; CHECK-EC: ldr x30, [sp, #48]
+; CHECK-EC: add sp, sp, #96
; CHECK-EC: br x11
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The win/arm64 exclusion lgtm.
✅ With the latest revision this PR passed the C/C++ code formatter. |
gentle ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, though we only require one approval, so it would have been fine to merge this after @zmodem's approval last week.
Thanks! |
This reapplies original commit e122483
Second try, but with win64 removed from the scope, since it was somehow broken by original commit.
Original PR description
If vaargs are not used there is no need to save them. LLVM already implements such optimization for x86, as well as gcc [1].
Some ABI tests are kept almost as-is, except for stack offsets, by just adding llvm.va_start. Only laapcs_vararg_frame.ll test was rewritten to match new behavior.
[1] https://godbolt.org/z/GWWKr8xMd