[llvm][X86] Fix merging of large sp updates #125007

macurtis-amd · 2025-01-30T01:05:24Z

In cases where emitSPUpdate produced multiple adds:

call foo
add 0x7FFFFFFF   <--chunk size
add ...

mergeSPUpdates would incorrectly adjust the offset of the first add producing an invalid immediate value.

This change teaches mergeSPUpdates to look for a subsequent add if updating the current one would exceed the chunk size.

@phoebewang @mconst

llvmbot · 2025-01-30T01:05:58Z

@llvm/pr-subscribers-backend-x86

Author: None (macurtis-amd)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/125007.diff

4 Files Affected:

(modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+1-1)
(modified) llvm/lib/Target/X86/X86FrameLowering.cpp (+37-26)
(modified) llvm/lib/Target/X86/X86FrameLowering.h (+3-2)
(added) llvm/test/CodeGen/X86/merge-huge-sp-updates.ll (+25)

diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 78db8413e62c9b..ad8c02c1f0d999 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     // Adjust stack pointer.
     int StackAdj = StackAdjust.getImm();
     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
-    int Offset = 0;
+    int64_t Offset = 0;
     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
 
     // Incoporate the retaddr area.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index f8ed75f189a776..9d89bfc7273ff2 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -391,9 +391,9 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
   return MI;
 }
 
-int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator &MBBI,
-                                     bool doMergeWithPrevious) const {
+int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator &MBBI,
+                                         bool doMergeWithPrevious) const {
   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
       (!doMergeWithPrevious && MBBI == MBB.end()))
     return 0;
@@ -415,27 +415,38 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
   if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
     PI = std::prev(PI);
 
-  unsigned Opc = PI->getOpcode();
-  int Offset = 0;
-
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
-      PI->getOperand(0).getReg() == StackPtr) {
-    assert(PI->getOperand(1).getReg() == StackPtr);
-    Offset = PI->getOperand(2).getImm();
-  } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
-             PI->getOperand(0).getReg() == StackPtr &&
-             PI->getOperand(1).getReg() == StackPtr &&
-             PI->getOperand(2).getImm() == 1 &&
-             PI->getOperand(3).getReg() == X86::NoRegister &&
-             PI->getOperand(5).getReg() == X86::NoRegister) {
-    // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
-    Offset = PI->getOperand(4).getImm();
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
-             PI->getOperand(0).getReg() == StackPtr) {
-    assert(PI->getOperand(1).getReg() == StackPtr);
-    Offset = -PI->getOperand(2).getImm();
-  } else
-    return 0;
+  int64_t Offset = 0;
+  for (;;) {
+    unsigned Opc = PI->getOpcode();
+
+    if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
+        PI->getOperand(0).getReg() == StackPtr) {
+      assert(PI->getOperand(1).getReg() == StackPtr);
+      Offset = PI->getOperand(2).getImm();
+    } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
+               PI->getOperand(0).getReg() == StackPtr &&
+               PI->getOperand(1).getReg() == StackPtr &&
+               PI->getOperand(2).getImm() == 1 &&
+               PI->getOperand(3).getReg() == X86::NoRegister &&
+               PI->getOperand(5).getReg() == X86::NoRegister) {
+      // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
+      Offset = PI->getOperand(4).getImm();
+    } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
+               PI->getOperand(0).getReg() == StackPtr) {
+      assert(PI->getOperand(1).getReg() == StackPtr);
+      Offset = -PI->getOperand(2).getImm();
+    } else
+      return 0;
+
+    constexpr int64_t Chunk = (1LL << 31) - 1;
+    if (Offset < Chunk)
+      break;
+
+    if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
+      return 0;
+
+    PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
+  }
 
   PI = MBB.erase(PI);
   if (PI != MBB.end() && PI->isCFIInstruction()) {
@@ -2457,7 +2468,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   if (HasFP) {
     if (X86FI->hasSwiftAsyncContext()) {
       // Discard the context.
-      int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
+      int64_t Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
       emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
     }
     // Pop EBP.
@@ -2618,7 +2629,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
   if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
     // Add the return addr area delta back since we are not tail calling.
-    int Offset = -1 * X86FI->getTCReturnAddrDelta();
+    int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
     assert(Offset >= 0 && "TCDelta should never be positive");
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 02fe8ee02a7e45..79d8ea1b217694 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -138,8 +138,9 @@ class X86FrameLowering : public TargetFrameLowering {
   /// it is an ADD/SUB/LEA instruction it is deleted argument and the
   /// stack adjustment is returned as a positive value for ADD/LEA and
   /// a negative for SUB.
-  int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                     bool doMergeWithPrevious) const;
+  int64_t mergeSPUpdates(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         bool doMergeWithPrevious) const;
 
   /// Emit a series of instructions to increment / decrement the stack
   /// pointer by a constant value.
diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
new file mode 100644
index 00000000000000..4828a7595bbcf9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s
+; RUN: FileCheck --input-file=%t.s %s
+
+; Check that the stack update after calling bar gets merged into the second add
+; and not the first which is already at the chunk size limit (0x7FFFFFFF).
+
+define void @foo(ptr %rhs) {
+; CHECK-LABEL: foo
+entry:
+  %lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16
+  store ptr %lhs, ptr %rhs, align 8
+  %0 = call i32 @baz()
+  call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
+; CHECK: call{{.*}}bar
+; CHECK: addq{{.*}}$2147483647, %rsp
+; CHECK: addq{{.*}}$372037585, %rsp
+  ret void
+}
+
+declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr)
+
+declare i32 @baz()
+
+
+; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s

llvm/lib/Target/X86/X86FrameLowering.cpp

ronlieb

lgtm, would like someone else to review as well

phoebewang · 2025-01-31T11:48:40Z

llvm/test/CodeGen/X86/merge-huge-sp-updates.ll

+declare i32 @baz()
+
+
+; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s


What's this checking for? Why don't put it in the beginning of the file?

@phoebewang Thanks for taking a look at this.

What's this checking for?

Added comment with rationale:

; Double-check that we are able to assemble the generated '.s'. A symptom of the ; problem that led to this test is an assembler failure when using ; '-save-temps'. For example: ; ; > ...s:683:7: error: invalid operand for instruction ; > addq $2147483679, %rsp # imm = 0x8000001F ; ; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s

Why don't put it in the beginning of the file?

Moved to to top of the file.

phoebewang · 2025-02-01T12:58:07Z

llvm/test/CodeGen/X86/merge-huge-sp-updates.ll

+  call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
+; CHECK: call{{.*}}bar
+; CHECK: addq{{.*}}$2147483647, %rsp
+; CHECK: addq{{.*}}$372037585, %rsp


I compared the assembly before and after this patch, but the CFI directive doesn't look correct to me after this patch.

Before:

addq $2147483679, %rsp # imm = 0x8000001F .cfi_adjust_cfa_offset -2147483679 addq $372037553, %rsp # imm = 0x162CD7B1 .cfi_def_cfa_offset 16 popq %rbx .cfi_def_cfa_offset 8 retq

After:

addq $2147483647, %rsp # imm = 0x7FFFFFFF addq $372037585, %rsp # imm = 0x162CD7D1 .cfi_adjust_cfa_offset -372037585 popq %rbx .cfi_def_cfa_offset 8 retq

Can you take a look please?

Latest version updates .cfi_adjust_cfa_offset to account for total stack adjustment:

addq $2147483647, %rsp # imm = 0x7FFFFFFF addq $372037585, %rsp # imm = 0x162CD7D1 .cfi_adjust_cfa_offset -2519521232 popq %rbx .cfi_def_cfa_offset 8 retq

My understanding is limited, but the .cfi_adjust_cfa_offset before my change seems incorrect in that it does not account for the following addq.

llvm/test/CodeGen/X86/merge-huge-sp-updates.ll

phoebewang

LGTM, thanks!

@phoebewang

In cases where `emitSPUpdate` produced multiple adds: ``` call foo add 0x7FFFFFFF <--chunk size add ... ``` `mergeSPUpdates` would incorrectly adjust the offset of the first add producing an invalid immediate value. This change teaches `mergeSPUpdates` to look for a subsequent add if updating the current one would exceed the chunk size. @phoebewang @mconst

Silence compiler warning introduced in llvm#125007 - assign the address delta to int64_t, assert it is negative and negate it only as part of the mergeSPAdd call Fixes llvm#125825

…7185) Silence compiler warning introduced in #125007 - assign the address delta to int64_t, assert it is negative and negate it only as part of the mergeSPAdd call Fixes #125825

…m#127185) Silence compiler warning introduced in llvm#125007 - assign the address delta to int64_t, assert it is negative and negate it only as part of the mergeSPAdd call Fixes llvm#125825

@phoebewang

In cases where `emitSPUpdate` produced multiple adds: ``` call foo add 0x7FFFFFFF <--chunk size add ... ``` `mergeSPUpdates` would incorrectly adjust the offset of the first add producing an invalid immediate value. This change teaches `mergeSPUpdates` to look for a subsequent add if updating the current one would exceed the chunk size. @phoebewang @mconst

llvmbot added the backend:X86 label Jan 30, 2025

RKSimon requested review from RKSimon, phoebewang and KanRobert January 30, 2025 08:45

ronlieb self-requested a review January 30, 2025 13:50

macurtis-amd commented Jan 30, 2025

View reviewed changes

llvm/lib/Target/X86/X86FrameLowering.cpp Outdated Show resolved Hide resolved

macurtis-amd force-pushed the fix-mrg-big-sp-update branch from 7914b85 to be00a11 Compare January 30, 2025 17:13

ronlieb approved these changes Jan 31, 2025

View reviewed changes

phoebewang reviewed Jan 31, 2025

View reviewed changes

macurtis-amd force-pushed the fix-mrg-big-sp-update branch from be00a11 to 83bb569 Compare January 31, 2025 15:23

phoebewang reviewed Feb 1, 2025

View reviewed changes

llvm/test/CodeGen/X86/merge-huge-sp-updates.ll Outdated Show resolved Hide resolved

macurtis-amd force-pushed the fix-mrg-big-sp-update branch from 83bb569 to c9dc643 Compare February 3, 2025 12:29

phoebewang approved these changes Feb 4, 2025

View reviewed changes

[llvm][X86] Fix merging of large sp updates

0b83065

macurtis-amd force-pushed the fix-mrg-big-sp-update branch from ba28547 to 0b83065 Compare February 4, 2025 11:36

macurtis-amd merged commit 69f202b into llvm:main Feb 4, 2025
8 checks passed

RKSimon mentioned this pull request Feb 14, 2025

[X86] emitEpilogue - silence implicit integer extension warning. #127185

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[llvm][X86] Fix merging of large sp updates #125007

[llvm][X86] Fix merging of large sp updates #125007

Uh oh!

macurtis-amd commented Jan 30, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jan 30, 2025

Uh oh!

Uh oh!

ronlieb left a comment

Uh oh!

phoebewang Jan 31, 2025

Uh oh!

macurtis-amd Jan 31, 2025

Uh oh!

phoebewang Feb 1, 2025

Uh oh!

macurtis-amd Feb 3, 2025

Uh oh!

Uh oh!

phoebewang left a comment

Uh oh!

Uh oh!

Uh oh!

		declare i32 @baz()


		; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s

[llvm][X86] Fix merging of large sp updates #125007

[llvm][X86] Fix merging of large sp updates #125007

Uh oh!

Conversation

macurtis-amd commented Jan 30, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jan 30, 2025

Uh oh!

Uh oh!

ronlieb left a comment

Choose a reason for hiding this comment

Uh oh!

phoebewang Jan 31, 2025

Choose a reason for hiding this comment

Uh oh!

macurtis-amd Jan 31, 2025

Choose a reason for hiding this comment

Uh oh!

phoebewang Feb 1, 2025

Choose a reason for hiding this comment

Uh oh!

macurtis-amd Feb 3, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

macurtis-amd commented Jan 30, 2025 •

edited

Loading