Skip to content

[X86][APX] Suppress EGPR/NDD instructions for relocations #136660

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 29, 2025
Merged
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(sources
X86SpeculativeLoadHardening.cpp
X86SpeculativeExecutionSideEffectSuppression.cpp
X86Subtarget.cpp
X86SuppressAPXForReloc.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
X86TargetTransformInfo.cpp
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
FunctionPass *createX86ArgumentStackSlotPass();
FunctionPass *createX86SuppressAPXForRelocationPass();

void initializeCompressEVEXPassPass(PassRegistry &);
void initializeFPSPass(PassRegistry &);
Expand Down Expand Up @@ -204,6 +205,7 @@ void initializeX86ReturnThunksPass(PassRegistry &);
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
void initializeX86TileConfigPass(PassRegistry &);
void initializeX86SuppressAPXForRelocationPassPass(PassRegistry &);

namespace X86AS {
enum : unsigned {
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/X86/X86CompressEVEX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ using namespace llvm;

#define DEBUG_TYPE COMP_EVEX_NAME

extern cl::opt<bool> X86EnableAPXForRelocation;

namespace {
// Including the generated EVEX compression tables.
#define GET_X86_COMPRESS_EVEX_TABLE
Expand Down Expand Up @@ -252,6 +254,13 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
if (MI.definesRegister(Super, /*TRI=*/nullptr))
IsRedundantNDD = false;
}

// ADDrm/mr instructions with NDD + relocation had been transformed to the
// instructions without NDD in X86SuppressAPXForRelocation pass. That is to
// keep backward compatibility with linkers without APX support.
if (!X86EnableAPXForRelocation)
assert(!isAddMemInstrWithRelocation(MI) &&
"Unexpected NDD instruction with relocation!");
}

// NonNF -> NF only if it's not a compressible NDD instruction and eflags is
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ STATISTIC(NumTestsInserted, "Number of test instructions inserted");
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");

extern cl::opt<bool> X86EnableAPXForRelocation;

namespace {

// Convenient array type for storing registers associated with each condition.
Expand Down Expand Up @@ -242,7 +244,15 @@ static EFLAGSClobber getClobberType(const MachineInstr &MI) {
MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
if (!FlagDef)
return NoClobber;
if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()))

// For the instructions are ADDrm/ADDmr with relocation, we'll skip the
// optimization for replacing non-NF with NF. This is to keep backward
// compatiblity with old version of linkers without APX relocation type
// support on Linux OS.
bool IsWithReloc =
X86EnableAPXForRelocation ? false : isAddMemInstrWithRelocation(MI);

if (FlagDef->isDead() && X86::getNFVariant(MI.getOpcode()) && !IsWithReloc)
return EvitableClobber;

return InevitableClobber;
Expand Down
30 changes: 13 additions & 17 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"

extern cl::opt<bool> X86EnableAPXForRelocation;

static cl::opt<bool>
NoFusing("disable-spill-fusing",
cl::desc("Disable fusing of spill code into instructions"),
Expand Down Expand Up @@ -102,22 +104,8 @@ X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
if (X86II::canUseApxExtendedReg(MCID))
return RC;

switch (RC->getID()) {
default:
return RC;
case X86::GR8RegClassID:
return &X86::GR8_NOREX2RegClass;
case X86::GR16RegClassID:
return &X86::GR16_NOREX2RegClass;
case X86::GR32RegClassID:
return &X86::GR32_NOREX2RegClass;
case X86::GR64RegClassID:
return &X86::GR64_NOREX2RegClass;
case X86::GR32_NOSPRegClassID:
return &X86::GR32_NOREX2_NOSPRegClass;
case X86::GR64_NOSPRegClassID:
return &X86::GR64_NOREX2_NOSPRegClass;
}
const X86RegisterInfo *RI = Subtarget.getRegisterInfo();
return RI->constrainRegClassToNonRex2(RC);
}

bool X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Expand Down Expand Up @@ -5480,8 +5468,16 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
continue;
}

// For the instructions are ADDrm/ADDmr with relocation, we'll skip the
// optimization for replacing non-NF with NF. This is to keep backward
// compatiblity with old version of linkers without APX relocation type
// support on Linux OS.
bool IsWithReloc = X86EnableAPXForRelocation
? false
: isAddMemInstrWithRelocation(Inst);

// Try to replace non-NF with NF instructions.
if (HasNF && Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
if (HasNF && Inst.registerDefIsDead(X86::EFLAGS, TRI) && !IsWithReloc) {
unsigned NewOp = X86::getNFVariant(Inst.getOpcode());
if (!NewOp)
return false;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,19 @@ inline static bool isMem(const MachineInstr &MI, unsigned Op) {
MI.getOperand(Op + X86::AddrSegmentReg).isReg() && isLeaMem(MI, Op);
}

inline static bool isAddMemInstrWithRelocation(const MachineInstr &MI) {
unsigned Op = MI.getOpcode();
if (Op == X86::ADD64rm || Op == X86::ADD64mr_ND || Op == X86::ADD64rm_ND) {
int MemOpNo = X86II::getMemoryOperandNo(MI.getDesc().TSFlags) +
X86II::getOperandBias(MI.getDesc());
const MachineOperand &MO = MI.getOperand(X86::AddrDisp + MemOpNo);
if (MO.getTargetFlags() == X86II::MO_GOTTPOFF)
return true;
}

return false;
}

class X86InstrInfo final : public X86GenInstrInfo {
X86Subtarget &Subtarget;
const X86RegisterInfo RI;
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/X86/X86RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1237,3 +1237,23 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,

return true;
}

const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
const TargetRegisterClass *RC) const {
switch (RC->getID()) {
default:
return RC;
case X86::GR8RegClassID:
return &X86::GR8_NOREX2RegClass;
case X86::GR16RegClassID:
return &X86::GR16_NOREX2RegClass;
case X86::GR32RegClassID:
return &X86::GR32_NOREX2RegClass;
case X86::GR64RegClassID:
return &X86::GR64_NOREX2RegClass;
case X86::GR32_NOSPRegClassID:
return &X86::GR32_NOREX2_NOSPRegClass;
case X86::GR64_NOSPRegClassID:
return &X86::GR64_NOREX2_NOSPRegClass;
}
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86RegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;

const TargetRegisterClass *
constrainRegClassToNonRex2(const TargetRegisterClass *RC) const;
};

} // End llvm namespace
Expand Down
Loading