-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[llvm][X86] Fix merging of large sp updates #125007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (macurtis-amd) ChangesFull diff: https://github.com/llvm/llvm-project/pull/125007.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 78db8413e62c9b..ad8c02c1f0d999 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
// Adjust stack pointer.
int StackAdj = StackAdjust.getImm();
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
- int Offset = 0;
+ int64_t Offset = 0;
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
// Incoporate the retaddr area.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index f8ed75f189a776..9d89bfc7273ff2 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -391,9 +391,9 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
return MI;
}
-int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- bool doMergeWithPrevious) const {
+int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool doMergeWithPrevious) const {
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
(!doMergeWithPrevious && MBBI == MBB.end()))
return 0;
@@ -415,27 +415,38 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
PI = std::prev(PI);
- unsigned Opc = PI->getOpcode();
- int Offset = 0;
-
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
- PI->getOperand(0).getReg() == StackPtr) {
- assert(PI->getOperand(1).getReg() == StackPtr);
- Offset = PI->getOperand(2).getImm();
- } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr &&
- PI->getOperand(1).getReg() == StackPtr &&
- PI->getOperand(2).getImm() == 1 &&
- PI->getOperand(3).getReg() == X86::NoRegister &&
- PI->getOperand(5).getReg() == X86::NoRegister) {
- // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
- Offset = PI->getOperand(4).getImm();
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
- PI->getOperand(0).getReg() == StackPtr) {
- assert(PI->getOperand(1).getReg() == StackPtr);
- Offset = -PI->getOperand(2).getImm();
- } else
- return 0;
+ int64_t Offset = 0;
+ for (;;) {
+ unsigned Opc = PI->getOpcode();
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ assert(PI->getOperand(1).getReg() == StackPtr);
+ Offset = PI->getOperand(2).getImm();
+ } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
+ PI->getOperand(0).getReg() == StackPtr &&
+ PI->getOperand(1).getReg() == StackPtr &&
+ PI->getOperand(2).getImm() == 1 &&
+ PI->getOperand(3).getReg() == X86::NoRegister &&
+ PI->getOperand(5).getReg() == X86::NoRegister) {
+ // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
+ Offset = PI->getOperand(4).getImm();
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ assert(PI->getOperand(1).getReg() == StackPtr);
+ Offset = -PI->getOperand(2).getImm();
+ } else
+ return 0;
+
+ constexpr int64_t Chunk = (1LL << 31) - 1;
+ if (Offset < Chunk)
+ break;
+
+ if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
+ return 0;
+
+ PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
+ }
PI = MBB.erase(PI);
if (PI != MBB.end() && PI->isCFIInstruction()) {
@@ -2457,7 +2468,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (HasFP) {
if (X86FI->hasSwiftAsyncContext()) {
// Discard the context.
- int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
+ int64_t Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
}
// Pop EBP.
@@ -2618,7 +2629,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
- int Offset = -1 * X86FI->getTCReturnAddrDelta();
+ int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
assert(Offset >= 0 && "TCDelta should never be positive");
if (Offset) {
// Check for possible merge with preceding ADD instruction.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 02fe8ee02a7e45..79d8ea1b217694 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -138,8 +138,9 @@ class X86FrameLowering : public TargetFrameLowering {
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
/// a negative for SUB.
- int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- bool doMergeWithPrevious) const;
+ int64_t mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool doMergeWithPrevious) const;
/// Emit a series of instructions to increment / decrement the stack
/// pointer by a constant value.
diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
new file mode 100644
index 00000000000000..4828a7595bbcf9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s
+; RUN: FileCheck --input-file=%t.s %s
+
+; Check that the stack update after calling bar gets merged into the second add
+; and not the first which is already at the chunk size limit (0x7FFFFFFF).
+
+define void @foo(ptr %rhs) {
+; CHECK-LABEL: foo
+entry:
+ %lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16
+ store ptr %lhs, ptr %rhs, align 8
+ %0 = call i32 @baz()
+ call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
+; CHECK: call{{.*}}bar
+; CHECK: addq{{.*}}$2147483647, %rsp
+; CHECK: addq{{.*}}$372037585, %rsp
+ ret void
+}
+
+declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr)
+
+declare i32 @baz()
+
+
+; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s
|
7914b85
to
be00a11
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, would like someone else to review as well
declare i32 @baz() | ||
|
||
|
||
; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's this checking for? Why don't put it in the beginning of the file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@phoebewang Thanks for taking a look at this.
What's this checking for?
Added comment with rationale:
; Double-check that we are able to assemble the generated '.s'. A symptom of the
; problem that led to this test is an assembler failure when using
; '-save-temps'. For example:
;
; > ...s:683:7: error: invalid operand for instruction
; > addq $2147483679, %rsp # imm = 0x8000001F
;
; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s
Why don't put it in the beginning of the file?
Moved to to top of the file.
be00a11
to
83bb569
Compare
call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs) | ||
; CHECK: call{{.*}}bar | ||
; CHECK: addq{{.*}}$2147483647, %rsp | ||
; CHECK: addq{{.*}}$372037585, %rsp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I compared the assembly before and after this patch, but the CFI directive doesn't look correct to me after this patch.
Before:
addq $2147483679, %rsp # imm = 0x8000001F
.cfi_adjust_cfa_offset -2147483679
addq $372037553, %rsp # imm = 0x162CD7B1
.cfi_def_cfa_offset 16
popq %rbx
.cfi_def_cfa_offset 8
retq
After:
addq $2147483647, %rsp # imm = 0x7FFFFFFF
addq $372037585, %rsp # imm = 0x162CD7D1
.cfi_adjust_cfa_offset -372037585
popq %rbx
.cfi_def_cfa_offset 8
retq
Can you take a look please?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Latest version updates .cfi_adjust_cfa_offset
to account for total stack adjustment:
addq $2147483647, %rsp # imm = 0x7FFFFFFF
addq $372037585, %rsp # imm = 0x162CD7D1
.cfi_adjust_cfa_offset -2519521232
popq %rbx
.cfi_def_cfa_offset 8
retq
My understanding is limited, but the .cfi_adjust_cfa_offset
before my change seems incorrect in that it does not account for the following addq.
83bb569
to
c9dc643
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
ba28547
to
0b83065
Compare
In cases where `emitSPUpdate` produced multiple adds: ``` call foo add 0x7FFFFFFF <--chunk size add ... ``` `mergeSPUpdates` would incorrectly adjust the offset of the first add producing an invalid immediate value. This change teaches `mergeSPUpdates` to look for a subsequent add if updating the current one would exceed the chunk size. @phoebewang @mconst
Silence compiler warning introduced in llvm#125007 - assign the address delta to int64_t, assert it is negative and negate it only as part of the mergeSPAdd call Fixes llvm#125825
…m#127185) Silence compiler warning introduced in llvm#125007 - assign the address delta to int64_t, assert it is negative and negate it only as part of the mergeSPAdd call Fixes llvm#125825
In cases where `emitSPUpdate` produced multiple adds: ``` call foo add 0x7FFFFFFF <--chunk size add ... ``` `mergeSPUpdates` would incorrectly adjust the offset of the first add producing an invalid immediate value. This change teaches `mergeSPUpdates` to look for a subsequent add if updating the current one would exceed the chunk size. @phoebewang @mconst
In cases where
emitSPUpdate
produced multiple adds:mergeSPUpdates
would incorrectly adjust the offset of the first add producing an invalid immediate value.This change teaches
mergeSPUpdates
to look for a subsequent add if updating the current one would exceed the chunk size.@phoebewang @mconst