Skip to content

[X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. #114265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
return -(I->getOperand(1).getImm());
}

// Currently handle only PUSHes we can reasonably expect to see
// in call sequences
// Handle other opcodes we reasonably expect to see in call
// sequences. Note this may include spill/restore of FP/BP.
switch (MI.getOpcode()) {
default:
assert(!(MI.modifiesRegister(X86::RSP, &RI) ||
MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) &&
"Unhandled opcode in getSPAdjust");
return 0;
case X86::PUSH32r:
case X86::PUSH32rmm:
Expand All @@ -467,6 +470,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
case X86::PUSH64rmr:
case X86::PUSH64i32:
return 8;
case X86::POP32r:
case X86::POP32rmm:
case X86::POP32rmr:
return -4;
case X86::POP64r:
case X86::POP64rmm:
case X86::POP64rmr:
return -8;
// FIXME: (implement and) use isAddImmediate in the
// default case instead of the following ADD/SUB cases.
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD64ri32:
if (MI.getOperand(0).getReg() == X86::RSP &&
MI.getOperand(1).getReg() == X86::RSP)
return -MI.getOperand(2).getImm();
return 0;
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB64ri32:
if (MI.getOperand(0).getReg() == X86::RSP &&
MI.getOperand(1).getReg() == X86::RSP)
return MI.getOperand(2).getImm();
return 0;
}
}

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ using namespace llvm;

yaml::X86MachineFunctionInfo::X86MachineFunctionInfo(
const llvm::X86MachineFunctionInfo &MFI)
: AMXProgModel(MFI.getAMXProgModel()) {}
: AMXProgModel(MFI.getAMXProgModel()),
FPClobberedByCall(MFI.getFPClobberedByCall()),
HasPushSequences(MFI.getHasPushSequences()) {}

void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<X86MachineFunctionInfo>::mapping(YamlIO, *this);
Expand All @@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone(
void X86MachineFunctionInfo::initializeBaseYamlFields(
const yaml::X86MachineFunctionInfo &YamlMFI) {
AMXProgModel = YamlMFI.AMXProgModel;
FPClobberedByCall = YamlMFI.FPClobberedByCall;
HasPushSequences = YamlMFI.HasPushSequences;
}

void X86MachineFunctionInfo::anchor() { }
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits<AMXProgModelEnum> {

struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
AMXProgModelEnum AMXProgModel;
bool FPClobberedByCall;
bool HasPushSequences;

X86MachineFunctionInfo() = default;
X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI);
Expand All @@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
template <> struct MappingTraits<X86MachineFunctionInfo> {
static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) {
YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel);
YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false);
YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false);
}
};
} // end namespace yaml
Expand Down
94 changes: 94 additions & 0 deletions llvm/test/CodeGen/X86/pr114265.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within
# call sequences, as previously it assumed only PUSHes would be present for parameter passing.
# What this test actually does is recreate a situation where:
# - something other than a PUSH appears in a call sequence, and
# - failing to recognize the SP adjustment by such an instruction actually changes something
# observable.
#
# To this end, we create a situation where:
# - the FP must be spilled around calls
# - a frame object is stored before a call frame and loaded in the call frame
# (emulating an argument restored from spill), following a call which POPs something
# - call-frame pseudos can *not* be simplified early in prologepilog
#
# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not
# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So,
# in the following we ensure that references to a frame object carry the same offset.
#
# NB:
# FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former
# is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified.
#
# The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may
# acheive the same.
#
# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
---
name: f
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: true
failsVerification: false
tracksDebugUserValues: true
registers: []
liveins:
- { reg: '$rdi', virtual-reg: '' }
- { reg: '$rsi', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 64
adjustsStack: true
hasCalls: true
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
isCalleeSavedInfoValid: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 64,
alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
machineFunctionInfo:
FPClobberedByCall: true
hasPushSequences: true
body: |
bb.0:
liveins: $rdi, $rsi
MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0)
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
$rdi = COPY renamable $rax
CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
...
# ensure the store and load to the frame object have matching offsets after resolution.
# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]]
# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]
Loading