Skip to content

Commit be00a11

Browse files
committed
[llvm][X86] Fix merging of large sp updates
1 parent e7e72a9 commit be00a11

File tree

4 files changed

+116
-43
lines changed

4 files changed

+116
-43
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
284284
// Adjust stack pointer.
285285
int StackAdj = StackAdjust.getImm();
286286
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
287-
int Offset = 0;
287+
int64_t Offset = 0;
288288
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
289289

290290
// Incoporate the retaddr area.
@@ -297,7 +297,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
297297

298298
if (Offset) {
299299
// Check for possible merge with preceding ADD instruction.
300-
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
300+
Offset = X86FL->mergeSPAdd(MBB, MBBI, Offset, true);
301301
X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);
302302
}
303303

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
223223
return false;
224224
}
225225

226+
constexpr int64_t MaxSPChunk = (1LL << 31) - 1;
227+
226228
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227229
/// stack pointer by a constant value.
228230
void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
@@ -242,7 +244,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
242244
return;
243245
}
244246

245-
uint64_t Chunk = (1LL << 31) - 1;
247+
uint64_t Chunk = MaxSPChunk;
246248

247249
MachineFunction &MF = *MBB.getParent();
248250
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
@@ -391,12 +393,14 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
391393
return MI;
392394
}
393395

394-
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
395-
MachineBasicBlock::iterator &MBBI,
396-
bool doMergeWithPrevious) const {
396+
template <typename T>
397+
int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
398+
MachineBasicBlock::iterator &MBBI,
399+
T CalcNewOffset,
400+
bool doMergeWithPrevious) const {
397401
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
398402
(!doMergeWithPrevious && MBBI == MBB.end()))
399-
return 0;
403+
return CalcNewOffset(0);
400404

401405
MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
402406

@@ -415,27 +419,37 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
415419
if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
416420
PI = std::prev(PI);
417421

418-
unsigned Opc = PI->getOpcode();
419-
int Offset = 0;
420-
421-
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
422-
PI->getOperand(0).getReg() == StackPtr) {
423-
assert(PI->getOperand(1).getReg() == StackPtr);
424-
Offset = PI->getOperand(2).getImm();
425-
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
426-
PI->getOperand(0).getReg() == StackPtr &&
427-
PI->getOperand(1).getReg() == StackPtr &&
428-
PI->getOperand(2).getImm() == 1 &&
429-
PI->getOperand(3).getReg() == X86::NoRegister &&
430-
PI->getOperand(5).getReg() == X86::NoRegister) {
431-
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
432-
Offset = PI->getOperand(4).getImm();
433-
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
434-
PI->getOperand(0).getReg() == StackPtr) {
435-
assert(PI->getOperand(1).getReg() == StackPtr);
436-
Offset = -PI->getOperand(2).getImm();
437-
} else
438-
return 0;
422+
int64_t Offset = 0;
423+
for (;;) {
424+
unsigned Opc = PI->getOpcode();
425+
426+
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
427+
PI->getOperand(0).getReg() == StackPtr) {
428+
assert(PI->getOperand(1).getReg() == StackPtr);
429+
Offset = PI->getOperand(2).getImm();
430+
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
431+
PI->getOperand(0).getReg() == StackPtr &&
432+
PI->getOperand(1).getReg() == StackPtr &&
433+
PI->getOperand(2).getImm() == 1 &&
434+
PI->getOperand(3).getReg() == X86::NoRegister &&
435+
PI->getOperand(5).getReg() == X86::NoRegister) {
436+
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
437+
Offset = PI->getOperand(4).getImm();
438+
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
439+
PI->getOperand(0).getReg() == StackPtr) {
440+
assert(PI->getOperand(1).getReg() == StackPtr);
441+
Offset = -PI->getOperand(2).getImm();
442+
} else
443+
return CalcNewOffset(0);
444+
445+
if (std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
446+
break;
447+
448+
if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
449+
return CalcNewOffset(0);
450+
451+
PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
452+
}
439453

440454
PI = MBB.erase(PI);
441455
if (PI != MBB.end() && PI->isCFIInstruction()) {
@@ -448,7 +462,16 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
448462
if (!doMergeWithPrevious)
449463
MBBI = skipDebugInstructionsForward(PI, MBB.end());
450464

451-
return Offset;
465+
return CalcNewOffset(Offset);
466+
}
467+
468+
int64_t X86FrameLowering::mergeSPAdd(MachineBasicBlock &MBB,
469+
MachineBasicBlock::iterator &MBBI,
470+
int64_t AddOffset,
471+
bool doMergeWithPrevious) const {
472+
return mergeSPUpdates(
473+
MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
474+
doMergeWithPrevious);
452475
}
453476

454477
void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
@@ -1975,8 +1998,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
19751998

19761999
// If there is an SUB32ri of ESP immediately before this instruction, merge
19772000
// the two. This can be the case when tail call elimination is enabled and
1978-
// the callee has more arguments then the caller.
1979-
NumBytes -= mergeSPUpdates(MBB, MBBI, true);
2001+
// the callee has more arguments than the caller.
2002+
NumBytes = mergeSPUpdates(
2003+
MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2004+
true);
19802005

19812006
// Adjust stack pointer: ESP -= numbytes.
19822007

@@ -2457,7 +2482,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
24572482
if (HasFP) {
24582483
if (X86FI->hasSwiftAsyncContext()) {
24592484
// Discard the context.
2460-
int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2485+
int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
24612486
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
24622487
}
24632488
// Pop EBP.
@@ -2531,7 +2556,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
25312556
// If there is an ADD32ri or SUB32ri of ESP immediately before this
25322557
// instruction, merge the two instructions.
25332558
if (NumBytes || MFI.hasVarSizedObjects())
2534-
NumBytes += mergeSPUpdates(MBB, MBBI, true);
2559+
NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
25352560

25362561
// If dynamic alloca is used, then reset esp to point to the last callee-saved
25372562
// slot before popping them off! Same applies for the case, when stack was
@@ -2618,11 +2643,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
26182643

26192644
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
26202645
// Add the return addr area delta back since we are not tail calling.
2621-
int Offset = -1 * X86FI->getTCReturnAddrDelta();
2646+
int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
26222647
assert(Offset >= 0 && "TCDelta should never be positive");
26232648
if (Offset) {
26242649
// Check for possible merge with preceding ADD instruction.
2625-
Offset += mergeSPUpdates(MBB, Terminator, true);
2650+
Offset = mergeSPAdd(MBB, Terminator, Offset, true);
26262651
emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
26272652
}
26282653
}
@@ -3822,8 +3847,8 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr(
38223847
// Merge with any previous or following adjustment instruction. Note: the
38233848
// instructions merged with here do not have CFI, so their stack
38243849
// adjustments do not feed into CfaAdjustment.
3825-
StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3826-
StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3850+
StackAdjustment = mergeSPAdd(MBB, InsertPos, StackAdjustment, true);
3851+
StackAdjustment = mergeSPAdd(MBB, InsertPos, StackAdjustment, false);
38273852

38283853
if (StackAdjustment) {
38293854
if (!(F.hasMinSize() &&

llvm/lib/Target/X86/X86FrameLowering.h

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,35 @@ class X86FrameLowering : public TargetFrameLowering {
134134
processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF,
135135
RegScavenger *RS) const override;
136136

137-
/// Check the instruction before/after the passed instruction. If
138-
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
139-
/// stack adjustment is returned as a positive value for ADD/LEA and
140-
/// a negative for SUB.
141-
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
142-
bool doMergeWithPrevious) const;
137+
private:
138+
/// Basic Pseudocode:
139+
/// if (instruction before/after the passed instruction is ADD/SUB/LEA)
140+
/// Offset = instruction stack adjustment
141+
/// ... positive value for ADD/LEA and negative for SUB
142+
/// return CalcNewOffset(Offset)
143+
/// else
144+
/// return CalcNewOffset(0)
145+
///
146+
/// It's possible that the selected instruction is not immediately
147+
/// before/after MBBI for large adjustments that have been split into multiple
148+
/// instructions.
149+
///
150+
/// CalcNewOffset should have the signature:
151+
/// int64_t CalcNewOffset(int64_t Offset)
152+
template <typename T>
153+
int64_t mergeSPUpdates(MachineBasicBlock &MBB,
154+
MachineBasicBlock::iterator &MBBI, T CalcNewOffset,
155+
bool doMergeWithPrevious) const;
156+
157+
public:
158+
/// Equivalent to:
159+
/// mergeSPUpdates(MBB, MBBI,
160+
/// [AddOffset](int64_t Offset) {
161+
/// return AddOffset + Offset;
162+
/// },
163+
/// doMergeWithPrevious);
164+
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
165+
int64_t AddOffset, bool doMergeWithPrevious) const;
143166

144167
/// Emit a series of instructions to increment / decrement the stack
145168
/// pointer by a constant value.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: llc < %s -O3 -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s
2+
; RUN: FileCheck --input-file=%t.s %s
3+
4+
; Check that the stack update after calling bar gets merged into the second add
5+
; and not the first which is already at the chunk size limit (0x7FFFFFFF).
6+
7+
define void @foo(ptr %rhs) {
8+
; CHECK-LABEL: foo
9+
entry:
10+
%lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16
11+
store ptr %lhs, ptr %rhs, align 8
12+
%0 = call i32 @baz()
13+
call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
14+
; CHECK: call{{.*}}bar
15+
; CHECK: addq{{.*}}$2147483647, %rsp
16+
; CHECK: addq{{.*}}$372037585, %rsp
17+
ret void
18+
}
19+
20+
declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr)
21+
22+
declare i32 @baz()
23+
24+
25+
; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s

0 commit comments

Comments
 (0)