Skip to content

Commit 6fb7cdf

Browse files
[X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. (#114265)
This code assumed only PUSHes would appear in call sequences. However, if calls require frame-pointer/base-pointer spills, only the PUSH operations inserted by spillFPBP will be recognized, and the adjustments to frame object offsets in prologepilog will be incorrect. This change correctly reports the SP adjustment for POP and ADD/SUB to rsp, and an assertion for unrecognized instructions that modify rsp.
1 parent a8a1e90 commit 6fb7cdf

File tree

4 files changed

+132
-3
lines changed

4 files changed

+132
-3
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
451451
return -(I->getOperand(1).getImm());
452452
}
453453

454-
// Currently handle only PUSHes we can reasonably expect to see
455-
// in call sequences
454+
// Handle other opcodes we reasonably expect to see in call
455+
// sequences. Note this may include spill/restore of FP/BP.
456456
switch (MI.getOpcode()) {
457457
default:
458+
assert(!(MI.modifiesRegister(X86::RSP, &RI) ||
459+
MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) &&
460+
"Unhandled opcode in getSPAdjust");
458461
return 0;
459462
case X86::PUSH32r:
460463
case X86::PUSH32rmm:
@@ -466,6 +469,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
466469
case X86::PUSH64rmr:
467470
case X86::PUSH64i32:
468471
return 8;
472+
case X86::POP32r:
473+
case X86::POP32rmm:
474+
case X86::POP32rmr:
475+
return -4;
476+
case X86::POP64r:
477+
case X86::POP64rmm:
478+
case X86::POP64rmr:
479+
return -8;
480+
// FIXME: (implement and) use isAddImmediate in the
481+
// default case instead of the following ADD/SUB cases.
482+
case X86::ADD32ri:
483+
case X86::ADD32ri8:
484+
case X86::ADD64ri32:
485+
if (MI.getOperand(0).getReg() == X86::RSP &&
486+
MI.getOperand(1).getReg() == X86::RSP)
487+
return -MI.getOperand(2).getImm();
488+
return 0;
489+
case X86::SUB32ri:
490+
case X86::SUB32ri8:
491+
case X86::SUB64ri32:
492+
if (MI.getOperand(0).getReg() == X86::RSP &&
493+
MI.getOperand(1).getReg() == X86::RSP)
494+
return MI.getOperand(2).getImm();
495+
return 0;
469496
}
470497
}
471498

llvm/lib/Target/X86/X86MachineFunctionInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ using namespace llvm;
1515

1616
yaml::X86MachineFunctionInfo::X86MachineFunctionInfo(
1717
const llvm::X86MachineFunctionInfo &MFI)
18-
: AMXProgModel(MFI.getAMXProgModel()) {}
18+
: AMXProgModel(MFI.getAMXProgModel()),
19+
FPClobberedByCall(MFI.getFPClobberedByCall()),
20+
HasPushSequences(MFI.getHasPushSequences()) {}
1921

2022
void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
2123
MappingTraits<X86MachineFunctionInfo>::mapping(YamlIO, *this);
@@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone(
3133
void X86MachineFunctionInfo::initializeBaseYamlFields(
3234
const yaml::X86MachineFunctionInfo &YamlMFI) {
3335
AMXProgModel = YamlMFI.AMXProgModel;
36+
FPClobberedByCall = YamlMFI.FPClobberedByCall;
37+
HasPushSequences = YamlMFI.HasPushSequences;
3438
}
3539

3640
void X86MachineFunctionInfo::anchor() { }

llvm/lib/Target/X86/X86MachineFunctionInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits<AMXProgModelEnum> {
3838

3939
struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
4040
AMXProgModelEnum AMXProgModel;
41+
bool FPClobberedByCall;
42+
bool HasPushSequences;
4143

4244
X86MachineFunctionInfo() = default;
4345
X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI);
@@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
4951
template <> struct MappingTraits<X86MachineFunctionInfo> {
5052
static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) {
5153
YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel);
54+
YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false);
55+
YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false);
5256
}
5357
};
5458
} // end namespace yaml

llvm/test/CodeGen/X86/pr114265.mir

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within
2+
# call sequences, as previously it assumed only PUSHes would be present for parameter passing.
3+
# What this test actually does is recreate a situation where:
4+
# - something other than a PUSH appears in a call sequence, and
5+
# - failing to recognize the SP adjustment by such an instruction actually changes something
6+
# observable.
7+
#
8+
# To this end, we create a situation where:
9+
# - the FP must be spilled around calls
10+
# - a frame object is stored before a call frame and loaded in the call frame
11+
# (emulating an argument restored from spill), following a call which POPs something
12+
# - call-frame pseudos can *not* be simplified early in prologepilog
13+
#
14+
# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not
15+
# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So,
16+
# in the following we ensure that references to a frame object carry the same offset.
17+
#
18+
# NB:
19+
# FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former
20+
# is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified.
21+
#
22+
# The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may
23+
# acheive the same.
24+
#
25+
# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
26+
---
27+
name: f
28+
alignment: 16
29+
exposesReturnsTwice: false
30+
legalized: false
31+
regBankSelected: false
32+
selected: false
33+
failedISel: false
34+
tracksRegLiveness: true
35+
hasWinCFI: false
36+
callsEHReturn: false
37+
callsUnwindInit: false
38+
hasEHCatchret: false
39+
hasEHScopes: false
40+
hasEHFunclets: false
41+
isOutlined: false
42+
debugInstrRef: true
43+
failsVerification: false
44+
tracksDebugUserValues: true
45+
registers: []
46+
liveins:
47+
- { reg: '$rdi', virtual-reg: '' }
48+
- { reg: '$rsi', virtual-reg: '' }
49+
frameInfo:
50+
isFrameAddressTaken: false
51+
isReturnAddressTaken: false
52+
hasStackMap: false
53+
hasPatchPoint: false
54+
stackSize: 0
55+
offsetAdjustment: 0
56+
maxAlignment: 64
57+
adjustsStack: true
58+
hasCalls: true
59+
stackProtector: ''
60+
functionContext: ''
61+
maxCallFrameSize: 4294967295
62+
cvBytesOfCalleeSavedRegisters: 0
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
hasTailCall: false
67+
isCalleeSavedInfoValid: false
68+
localFrameSize: 0
69+
savePoint: ''
70+
restorePoint: ''
71+
fixedStack: []
72+
stack:
73+
- { id: 0, name: '', type: spill-slot, offset: 0, size: 64,
74+
alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true,
75+
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
76+
machineFunctionInfo:
77+
FPClobberedByCall: true
78+
hasPushSequences: true
79+
body: |
80+
bb.0:
81+
liveins: $rdi, $rsi
82+
MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0)
83+
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
84+
CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
85+
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
86+
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
87+
$rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
88+
$rdi = COPY renamable $rax
89+
CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
90+
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
91+
...
92+
# ensure the store and load to the frame object have matching offsets after resolution.
93+
# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]]
94+
# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]

0 commit comments

Comments
 (0)