llvm · e-kud · Nov 14, 2024 · Oct 30, 2024 · Oct 31, 2024
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -452,10 +452,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
     return -(I->getOperand(1).getImm());
   }
 
-  // Currently handle only PUSHes we can reasonably expect to see
-  // in call sequences
+  // Handle other opcodes we reasonably expect to see in call
+  // sequences. Note this may include spill/restore of FP/BP.
   switch (MI.getOpcode()) {
   default:
+    assert(!(MI.modifiesRegister(X86::RSP, &RI) ||
+             MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) &&
+           "Unhandled opcode in getSPAdjust");
     return 0;
   case X86::PUSH32r:
   case X86::PUSH32rmm:
@@ -467,6 +470,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
   case X86::PUSH64rmr:
   case X86::PUSH64i32:
     return 8;
+  case X86::POP32r:
+  case X86::POP32rmm:
+  case X86::POP32rmr:
+    return -4;
+  case X86::POP64r:
+  case X86::POP64rmm:
+  case X86::POP64rmr:
+    return -8;
+  // FIXME: (implement and) use isAddImmediate in the
+  // default case instead of the following ADD/SUB cases.
+  case X86::ADD32ri:
+  case X86::ADD32ri8:
+  case X86::ADD64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return -MI.getOperand(2).getImm();
+    return 0;
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return MI.getOperand(2).getImm();
+    return 0;
   }
 }
 

diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -15,7 +15,9 @@ using namespace llvm;
 
 yaml::X86MachineFunctionInfo::X86MachineFunctionInfo(
     const llvm::X86MachineFunctionInfo &MFI)
-    : AMXProgModel(MFI.getAMXProgModel()) {}
+    : AMXProgModel(MFI.getAMXProgModel()),
+      FPClobberedByCall(MFI.getFPClobberedByCall()),
+      HasPushSequences(MFI.getHasPushSequences()) {}
 
 void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
   MappingTraits<X86MachineFunctionInfo>::mapping(YamlIO, *this);
@@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone(
 void X86MachineFunctionInfo::initializeBaseYamlFields(
     const yaml::X86MachineFunctionInfo &YamlMFI) {
   AMXProgModel = YamlMFI.AMXProgModel;
+  FPClobberedByCall = YamlMFI.FPClobberedByCall;
+  HasPushSequences = YamlMFI.HasPushSequences;
 }
 
 void X86MachineFunctionInfo::anchor() { }

diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits<AMXProgModelEnum> {
 
 struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
   AMXProgModelEnum AMXProgModel;
+  bool FPClobberedByCall;
+  bool HasPushSequences;
 
   X86MachineFunctionInfo() = default;
   X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI);
@@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
 template <> struct MappingTraits<X86MachineFunctionInfo> {
   static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) {
     YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel);
+    YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false);
+    YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false);
   }
 };
 } // end namespace yaml

diff --git a/llvm/test/CodeGen/X86/pr114265.mir b/llvm/test/CodeGen/X86/pr114265.mir
@@ -0,0 +1,94 @@
+# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within
+# call sequences, as previously it assumed only PUSHes would be present for parameter passing.
+# What this test actually does is recreate a situation where:
+#  - something other than a PUSH appears in a call sequence, and
+#  - failing to recognize the SP adjustment by such an instruction actually changes something
+#    observable.
+#
+# To this end, we create a situation where:
+#  - the FP must be spilled around calls
+#  - a frame object is stored before a call frame and loaded in the call frame 
+#    (emulating an argument restored from spill), following a call which POPs something
+#  - call-frame pseudos can *not* be simplified early in prologepilog
+#
+# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not
+# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So,
+# in the following we ensure that references to a frame object carry the same offset.
+#
+# NB:
+#  FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former
+#  is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified.
+#
+#  The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may
+#  acheive the same.
+#
+# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s 
+---
+name:            f
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: true
+registers:       []
+liveins:
+  - { reg: '$rdi', virtual-reg: '' }
+  - { reg: '$rsi', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    64
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, 
+      alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  FPClobberedByCall:  true
+  hasPushSequences: true
+body:             |
+  bb.0:
+    liveins: $rdi, $rsi
+    MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0)
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
+    $rdi = COPY renamable $rax
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+...
+# ensure the store and load to the frame object have matching offsets after resolution.
+# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]]
+# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]