Skip to content

Commit 7914b85

Browse files
committed
[llvm][X86] Fix merging of large sp updates
1 parent cdc09a1 commit 7914b85

File tree

4 files changed

+66
-29
lines changed

4 files changed

+66
-29
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
284284
// Adjust stack pointer.
285285
int StackAdj = StackAdjust.getImm();
286286
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
287-
int Offset = 0;
287+
int64_t Offset = 0;
288288
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
289289

290290
// Incoporate the retaddr area.

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,9 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
391391
return MI;
392392
}
393393

394-
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
395-
MachineBasicBlock::iterator &MBBI,
396-
bool doMergeWithPrevious) const {
394+
int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
395+
MachineBasicBlock::iterator &MBBI,
396+
bool doMergeWithPrevious) const {
397397
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
398398
(!doMergeWithPrevious && MBBI == MBB.end()))
399399
return 0;
@@ -415,27 +415,38 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
415415
if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
416416
PI = std::prev(PI);
417417

418-
unsigned Opc = PI->getOpcode();
419-
int Offset = 0;
420-
421-
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
422-
PI->getOperand(0).getReg() == StackPtr) {
423-
assert(PI->getOperand(1).getReg() == StackPtr);
424-
Offset = PI->getOperand(2).getImm();
425-
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
426-
PI->getOperand(0).getReg() == StackPtr &&
427-
PI->getOperand(1).getReg() == StackPtr &&
428-
PI->getOperand(2).getImm() == 1 &&
429-
PI->getOperand(3).getReg() == X86::NoRegister &&
430-
PI->getOperand(5).getReg() == X86::NoRegister) {
431-
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
432-
Offset = PI->getOperand(4).getImm();
433-
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
434-
PI->getOperand(0).getReg() == StackPtr) {
435-
assert(PI->getOperand(1).getReg() == StackPtr);
436-
Offset = -PI->getOperand(2).getImm();
437-
} else
438-
return 0;
418+
int64_t Offset = 0;
419+
for (;;) {
420+
unsigned Opc = PI->getOpcode();
421+
422+
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
423+
PI->getOperand(0).getReg() == StackPtr) {
424+
assert(PI->getOperand(1).getReg() == StackPtr);
425+
Offset = PI->getOperand(2).getImm();
426+
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
427+
PI->getOperand(0).getReg() == StackPtr &&
428+
PI->getOperand(1).getReg() == StackPtr &&
429+
PI->getOperand(2).getImm() == 1 &&
430+
PI->getOperand(3).getReg() == X86::NoRegister &&
431+
PI->getOperand(5).getReg() == X86::NoRegister) {
432+
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
433+
Offset = PI->getOperand(4).getImm();
434+
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
435+
PI->getOperand(0).getReg() == StackPtr) {
436+
assert(PI->getOperand(1).getReg() == StackPtr);
437+
Offset = -PI->getOperand(2).getImm();
438+
} else
439+
return 0;
440+
441+
constexpr int64_t Chunk = (1LL << 31) - 1;
442+
if (Offset < Chunk)
443+
break;
444+
445+
if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
446+
return 0;
447+
448+
PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
449+
}
439450

440451
PI = MBB.erase(PI);
441452
if (PI != MBB.end() && PI->isCFIInstruction()) {
@@ -2457,7 +2468,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
24572468
if (HasFP) {
24582469
if (X86FI->hasSwiftAsyncContext()) {
24592470
// Discard the context.
2460-
int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2471+
int64_t Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
24612472
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
24622473
}
24632474
// Pop EBP.
@@ -2618,7 +2629,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
26182629

26192630
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
26202631
// Add the return addr area delta back since we are not tail calling.
2621-
int Offset = -1 * X86FI->getTCReturnAddrDelta();
2632+
int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
26222633
assert(Offset >= 0 && "TCDelta should never be positive");
26232634
if (Offset) {
26242635
// Check for possible merge with preceding ADD instruction.

llvm/lib/Target/X86/X86FrameLowering.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,9 @@ class X86FrameLowering : public TargetFrameLowering {
138138
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
139139
/// stack adjustment is returned as a positive value for ADD/LEA and
140140
/// a negative for SUB.
141-
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
142-
bool doMergeWithPrevious) const;
141+
int64_t mergeSPUpdates(MachineBasicBlock &MBB,
142+
MachineBasicBlock::iterator &MBBI,
143+
bool doMergeWithPrevious) const;
143144

144145
/// Emit a series of instructions to increment / decrement the stack
145146
/// pointer by a constant value.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: llc < %s -O3 -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s
2+
; RUN: FileCheck --input-file=%t.s %s
3+
4+
; Check that the stack update after calling bar gets merged into the second add
5+
; and not the first which is already at the chunk size limit (0x7FFFFFFF).
6+
7+
define void @foo(ptr %rhs) {
8+
; CHECK-LABEL: foo
9+
entry:
10+
%lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16
11+
store ptr %lhs, ptr %rhs, align 8
12+
%0 = call i32 @baz()
13+
call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
14+
; CHECK: call{{.*}}bar
15+
; CHECK: addq{{.*}}$2147483647, %rsp
16+
; CHECK: addq{{.*}}$372037585, %rsp
17+
ret void
18+
}
19+
20+
declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr)
21+
22+
declare i32 @baz()
23+
24+
25+
; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s

0 commit comments

Comments
 (0)