Skip to content

Reapply "[Aarch64] [ISel] Don't save vaargs registers if vaargs are unused" #131459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 24, 2025

Conversation

pskrgag
Copy link
Contributor

@pskrgag pskrgag commented Mar 15, 2025

This reapplies original commit e122483

Second try, but with win64 removed from the scope, since it was somehow broken by original commit.

Original PR description

If vaargs are not used there is no need to save them. LLVM already implements such optimization for x86, as well as gcc [1].

Some ABI tests are kept almost as-is, except for stack offsets, by just adding llvm.va_start. Only laapcs_vararg_frame.ll test was rewritten to match new behavior.

[1] https://godbolt.org/z/GWWKr8xMd

@llvmbot
Copy link
Member

llvmbot commented Mar 15, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: Pavel Skripkin (pskrgag)

Changes

This reapplies original commit e122483

Second try, but with win64 removed from the scope, since it was somehow broken by original commit.

Original PR description

If vaargs are not used there is no need to save them. LLVM already implements such optimization for x86, as well as gcc [1].

Some ABI tests are kept almost as-is, except for stack offsets, by just adding llvm.va_start. Only laapcs_vararg_frame.ll test was rewritten to match new behavior.

[1] https://godbolt.org/z/GWWKr8xMd


Full diff: https://github.com/llvm/llvm-project/pull/131459.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+3-1)
  • (modified) llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll (+54-10)
  • (modified) llvm/test/CodeGen/AArch64/alloca.ll (+14-10)
  • (modified) llvm/test/CodeGen/AArch64/darwinpcs-tail.ll (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/vararg-tallcall.ll (+8-2)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a9b4965e32b4c..059789cddc342 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8230,7 +8230,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   }
 
   // varargs
-  if (isVarArg) {
+  // Note that IsWin64 part is required to prevent odd miscompilations on arm64 windows
+  // platforms. For more info refer to GH#126780 PR comments.
+  if (isVarArg && (DAG.getMachineFunction().getFrameInfo().hasVAStart() || IsWin64)) {
     if (!Subtarget->isTargetDarwin() || IsWin64) {
       // The AAPCS variadic function ABI is identical to the non-variadic
       // one. As a result there may be more arguments in registers and we should
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
index 7892e892a2412..6ea520ddcd6cd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll
@@ -2,19 +2,11 @@
 ; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s
 ; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL
 
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
 define void @va(i32 %count, half %f, ...) nounwind {
 ; CHECK-LABEL: va:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sub sp, sp, #176
-; CHECK-NEXT:    stp x2, x3, [sp, #128]
-; CHECK-NEXT:    str x1, [sp, #120]
-; CHECK-NEXT:    stp x4, x5, [sp, #144]
-; CHECK-NEXT:    stp x6, x7, [sp, #160]
-; CHECK-NEXT:    stp q1, q2, [sp]
-; CHECK-NEXT:    stp q3, q4, [sp, #32]
-; CHECK-NEXT:    stp q5, q6, [sp, #64]
-; CHECK-NEXT:    str q7, [sp, #96]
-; CHECK-NEXT:    add sp, sp, #176
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: va:
@@ -33,3 +25,55 @@ define void @va(i32 %count, half %f, ...) nounwind {
 entry:
   ret void
 }
+
+define void @va_used(i32 %count, half %f, ...) nounwind {
+; CHECK-LABEL: va_used:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #208
+; CHECK-NEXT:    mov x8, #-56 // =0xffffffffffffffc8
+; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    add x10, sp, #120
+; CHECK-NEXT:    movk x8, #65424, lsl #32
+; CHECK-NEXT:    add x9, x9, #112
+; CHECK-NEXT:    stp x2, x3, [sp, #128]
+; CHECK-NEXT:    stp x9, x8, [sp, #192]
+; CHECK-NEXT:    add x8, x10, #56
+; CHECK-NEXT:    add x9, sp, #208
+; CHECK-NEXT:    str x1, [sp, #120]
+; CHECK-NEXT:    stp x4, x5, [sp, #144]
+; CHECK-NEXT:    stp x6, x7, [sp, #160]
+; CHECK-NEXT:    stp q1, q2, [sp]
+; CHECK-NEXT:    stp q3, q4, [sp, #32]
+; CHECK-NEXT:    stp q5, q6, [sp, #64]
+; CHECK-NEXT:    str q7, [sp, #96]
+; CHECK-NEXT:    stp x9, x8, [sp, #176]
+; CHECK-NEXT:    add sp, sp, #208
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: va_used:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    sub sp, sp, #208
+; GISEL-NEXT:    mov x8, sp
+; GISEL-NEXT:    add x9, sp, #208
+; GISEL-NEXT:    add x10, sp, #208
+; GISEL-NEXT:    stp x9, x10, [x8]
+; GISEL-NEXT:    add x9, sp, #144
+; GISEL-NEXT:    mov w10, #-112 // =0xffffff90
+; GISEL-NEXT:    str x9, [x8, #16]
+; GISEL-NEXT:    mov w9, #-56 // =0xffffffc8
+; GISEL-NEXT:    stp x1, x2, [sp, #152]
+; GISEL-NEXT:    stp x3, x4, [sp, #168]
+; GISEL-NEXT:    stp x5, x6, [sp, #184]
+; GISEL-NEXT:    str x7, [sp, #200]
+; GISEL-NEXT:    stp q1, q2, [sp, #32]
+; GISEL-NEXT:    stp q3, q4, [sp, #64]
+; GISEL-NEXT:    stp q5, q6, [sp, #96]
+; GISEL-NEXT:    str q7, [sp, #128]
+; GISEL-NEXT:    stp w9, w10, [x8, #24]
+; GISEL-NEXT:    add sp, sp, #208
+; GISEL-NEXT:    ret
+  %valist = alloca %struct.__va_list
+  call void @llvm.va_start(ptr %valist)
+  call void @llvm.va_end(ptr %valist)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll
index ca3a500d79f32..119b84e352c74 100644
--- a/llvm/test/CodeGen/AArch64/alloca.ll
+++ b/llvm/test/CodeGen/AArch64/alloca.ll
@@ -62,6 +62,8 @@ define i64 @test_alloca_with_local(i64 %n) {
 ; CHECK: ret
 }
 
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
 define void @test_variadic_alloca(i64 %n, ...) {
 ; CHECK-LABEL: test_variadic_alloca:
 
@@ -77,31 +79,33 @@ define void @test_variadic_alloca(i64 %n, ...) {
 
 ; CHECK: stp     x29, x30, [sp, #-16]!
 ; CHECK: mov     x29, sp
-; CHECK: sub     sp, sp, #192
-; CHECK-DAG: stp     q6, q7, [x29, #-96]
+; CHECK: sub     sp, sp, #224
+; CHECK-DAG: stp     q6, q7, [x29, #-128]
 ; [...]
-; CHECK-DAG: stp     q0, q1, [x29, #-192]
+; CHECK-DAG: stp     q2, q3, [x29, #-192]
 
-; CHECK-DAG: stp     x5, x6, [x29, #-24]
+; CHECK-DAG: stp     x5, x6, [x29, #-56]
 ; [...]
-; CHECK-DAG: stp     x1, x2, [x29, #-56]
+; CHECK-DAG: stp     x1, x2, [x29, #-88]
 
 ; CHECK-NOFP-ARM64: stp     x29, x30, [sp, #-16]!
 ; CHECK-NOFP-ARM64: mov     x29, sp
-; CHECK-NOFP-ARM64: sub     sp, sp, #64
-; CHECK-NOFP-ARM64-DAG: stp     x5, x6, [x29, #-24]
+; CHECK-NOFP-ARM64: sub     sp, sp, #16
+; CHECK-NOFP-ARM64-DAG: stp     x5, x6, [x29, #-56]
 ; [...]
-; CHECK-NOFP-ARM64-DAG: stp     x3, x4, [x29, #-40]
+; CHECK-NOFP-ARM64-DAG: stp     x3, x4, [x29, #-72]
 ; [...]
-; CHECK-NOFP-ARM64-DAG: stp     x1, x2, [x29, #-56]
+; CHECK-NOFP-ARM64-DAG: stp     x1, x2, [x29, #-88]
 ; [...]
 ; CHECK-NOFP-ARM64: mov     x8, sp
-
+  %valist = alloca %struct.__va_list
+  call void @llvm.va_start(ptr %valist)
   %addr = alloca i8, i64 %n
 
   call void @use_addr(ptr %addr)
 ; CHECK: bl use_addr
 
+  call void @llvm.va_end(ptr %valist)
   ret void
 
 ; CHECK-NOFP-AARCH64: sub sp, x29, #64
diff --git a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
index 5d3c755d0d73d..da176894c48a9 100644
--- a/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
+++ b/llvm/test/CodeGen/AArch64/darwinpcs-tail.ll
@@ -8,8 +8,8 @@
 ; CHECK-LABEL: _tailTest:
 ; CHECK:       b __ZN1C3addEPKcz
 ; CHECK-LABEL: __ZThn8_N1C1fEiiiiiiiiiz:
-; CHECK:       ldr     w9, [sp, #4]
-; CHECK:       str     w9, [sp, #4]
+; CHECK:       ldr     w8, [sp, #4]
+; CHECK:       str     w8, [sp, #4]
 ; CHECK:       b __ZN1C1fEiiiiiiiiiz
 
 %class.C = type { %class.A.base, [4 x i8], %class.B.base, [4 x i8] }
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 812837639196e..7ee76c8ad50a2 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -8,18 +8,22 @@ target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
 
 %class.X = type { i8 }
 %struct.B = type { ptr }
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
 
 $"??_9B@@$BA@AA" = comdat any
 
 ; Function Attrs: noinline optnone
 define linkonce_odr void @"??_9B@@$BA@AA"(ptr %this, ...) #1 comdat align 2  {
 entry:
+  %valist = alloca %struct.__va_list
+  call void @llvm.va_start.p0(ptr %valist)
   %this.addr = alloca ptr, align 8
   store ptr %this, ptr %this.addr, align 8
   %this1 = load ptr, ptr %this.addr, align 8
   call void asm sideeffect "", "~{d0}"()
   %vtable = load ptr, ptr %this1, align 8
   %0 = load ptr, ptr %vtable, align 8
+  call void @llvm.va_end.p0(ptr %valist)
   musttail call void (ptr, ...) %0(ptr %this1, ...)
   ret void
                                                   ; No predecessors!
@@ -36,7 +40,9 @@ attributes #1 = { noinline optnone "thunk" }
 ; CHECK-EC: mov     v7.16b, v0.16b
 ; CHECK-EC: ldr     x9, [x0]
 ; CHECK-EC: ldr     x11, [x9]
+; CHECH-EC: add     x4, sp, #96
 ; CHECK-EC: mov     v0.16b, v7.16b
-; CHECK-EC: add     x4, sp, #64
-; CHECK-EC: add     sp, sp, #64
+; CHECK-EC: add     x4, sp, #96
+; CHECK-EC: ldr     x30, [sp, #48]
+; CHECK-EC: add     sp, sp, #96
 ; CHECK-EC: br      x11

Copy link
Collaborator

@zmodem zmodem left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The win/arm64 exclusion lgtm.

Copy link

github-actions bot commented Mar 19, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@pskrgag
Copy link
Contributor Author

pskrgag commented Mar 22, 2025

gentle ping

Copy link
Collaborator

@ostannard ostannard left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, though we only require one approval, so it would have been fine to merge this after @zmodem's approval last week.

@pskrgag pskrgag merged commit 0e63180 into llvm:main Mar 24, 2025
11 checks passed
@pskrgag
Copy link
Contributor Author

pskrgag commented Mar 24, 2025

Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants