Skip to content

[X86,SimplifyCFG] Allow more PHIs when sinking common code on target supports CMOV #110420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1114,9 +1114,17 @@ class TargetTransformInfo {
/// \return the number of registers in the target-provided register class.
unsigned getNumberOfRegisters(unsigned ClassID) const;

enum class MoveType {
NoMem = 1,
MemLD = 2,
MemST = 4,
All = 7,
};

/// \return true if the target supports load/store that enables fault
/// suppression of memory operands when the source condition is false.
bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
bool hasConditionalMoveForType(Type *Ty = nullptr,
MoveType MT = MoveType::NoMem) const;

/// \return the target-provided register class ID for the provided type,
/// accounting for type promotion and other type-legalization techniques that
Expand Down Expand Up @@ -1978,7 +1986,9 @@ class TargetTransformInfo::Concept {
virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
const Function &Fn) const = 0;
virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
virtual bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const = 0;
virtual bool
hasConditionalMoveForType(Type *Ty = nullptr,
MoveType MT = MoveType::NoMem) const = 0;
virtual unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const = 0;
virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
Expand Down Expand Up @@ -2571,8 +2581,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned getNumberOfRegisters(unsigned ClassID) const override {
return Impl.getNumberOfRegisters(ClassID);
}
bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const override {
return Impl.hasConditionalLoadStoreForType(Ty);
bool hasConditionalMoveForType(Type *Ty = nullptr,
MoveType MT = MoveType::NoMem) const override {
return Impl.hasConditionalMoveForType(Ty, MT);
}
unsigned getRegisterClassForType(bool Vector,
Type *Ty = nullptr) const override {
Expand Down
5 changes: 4 additions & 1 deletion llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,10 @@ class TargetTransformInfoImplBase {
}

unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
bool hasConditionalLoadStoreForType(Type *Ty) const { return false; }
bool hasConditionalMoveForType(Type *Ty,
TargetTransformInfo::MoveType MT) const {
return false;
}

unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
return Vector ? 1 : 0;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,9 @@ unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
return TTIImpl->getNumberOfRegisters(ClassID);
}

bool TargetTransformInfo::hasConditionalLoadStoreForType(Type *Ty) const {
return TTIImpl->hasConditionalLoadStoreForType(Ty);
bool TargetTransformInfo::hasConditionalMoveForType(Type *Ty,
MoveType MT) const {
return TTIImpl->hasConditionalMoveForType(Ty, MT);
}

unsigned TargetTransformInfo::getRegisterClassForType(bool Vector,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4806,7 +4806,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
SDValue StoreNode =
!IsCompressing &&
TTI.hasConditionalLoadStoreForType(I.getArgOperand(0)->getType())
TTI.hasConditionalMoveForType(I.getArgOperand(0)->getType(),
TargetTransformInfo::MoveType::All)
? TLI.visitMaskedStore(DAG, sdl, getMemoryRoot(), MMO, Ptr, Src0,
Mask)
: DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask,
Expand Down Expand Up @@ -4992,7 +4993,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Load;
SDValue Res;
if (!IsExpanding &&
TTI.hasConditionalLoadStoreForType(Src0Operand->getType()))
TTI.hasConditionalMoveForType(Src0Operand->getType(),
TargetTransformInfo::MoveType::All))
Res = TLI.visitMaskedLoad(DAG, sdl, InChain, MMO, Load, Ptr, Src0, Mask);
else
Res = Load =
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,11 @@ unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
return 8;
}

bool X86TTIImpl::hasConditionalLoadStoreForType(Type *Ty) const {
if (!ST->hasCF())
bool X86TTIImpl::hasConditionalMoveForType(
Type *Ty, TargetTransformInfo::MoveType MT) const {
if (!ST->canUseCMOV())
return false;
if (MT != TargetTransformInfo::MoveType::NoMem && !ST->hasCF())
return false;
if (!Ty)
return true;
Expand Down Expand Up @@ -6064,7 +6067,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {

// The backend can't handle a single element vector w/o CFCMOV.
if (isa<VectorType>(DataTy) && cast<FixedVectorType>(DataTy)->getNumElements() == 1)
return ST->hasCF() && hasConditionalLoadStoreForType(ScalarTy);
return ST->hasCF() && hasConditionalMoveForType(ScalarTy);

if (!ST->hasAVX())
return false;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/X86/X86TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
/// @{

unsigned getNumberOfRegisters(unsigned ClassID) const;
bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const;
bool
hasConditionalMoveForType(Type *Ty = nullptr,
TargetTransformInfo::MoveType MT =
TargetTransformInfo::MoveType::NoMem) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(ElementCount VF);
Expand Down
22 changes: 16 additions & 6 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));

static cl::opt<unsigned> ProfitableToSinkInstructionThreshold(
"profitable-to-sink-instruction-threshold", cl::Hidden, cl::init(6),
cl::desc("Control the maximal PHI instructions"));

static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
Expand Down Expand Up @@ -1742,7 +1746,8 @@ static bool isSafeCheapLoadStore(const Instruction *I,
// llvm.masked.load/store use i32 for alignment while load/store use i64.
// That's why we have the alignment limitation.
// FIXME: Update the prototype of the intrinsics?
return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
return TTI.hasConditionalMoveForType(getLoadStoreType(I),
TargetTransformInfo::MoveType::All) &&
getLoadStoreAlignment(I) < Value::MaximumAlignment;
}

Expand Down Expand Up @@ -2386,8 +2391,8 @@ namespace {

/// Check whether BB's predecessors end with unconditional branches. If it is
/// true, sink any common code from the predecessors to BB.
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
DomTreeUpdater *DTU) {
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU,
const TargetTransformInfo &TTI) {
// We support two situations:
// (1) all incoming arcs are unconditional
// (2) there are non-unconditional incoming arcs
Expand Down Expand Up @@ -2492,12 +2497,16 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
// sink?
auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
unsigned NumPHIInsts = 0;
unsigned NumCmovInsts = 0;
for (Use &U : (*LRI)[0]->operands()) {
auto It = PHIOperands.find(&U);
if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
return InstructionsToSink.contains(V);
})) {
++NumPHIInsts;
if (TTI.hasConditionalMoveForType(U->getType()))
++NumCmovInsts;
else
++NumPHIInsts;
// Do not separate a load/store from the gep producing the address.
// The gep can likely be folded into the load/store as an addressing
// mode. Additionally, a load of a gep is easier to analyze than a
Expand All @@ -2511,7 +2520,8 @@ static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
}
}
LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
return NumPHIInsts <= 1;
return NumPHIInsts <= 1 &&
NumCmovInsts < ProfitableToSinkInstructionThreshold;
};

// We've determined that we are going to sink last ScanIdx instructions,
Expand Down Expand Up @@ -8119,7 +8129,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
return true;

if (SinkCommon && Options.SinkCommonInsts)
if (sinkCommonCodeFromPredecessors(BB, DTU) ||
if (sinkCommonCodeFromPredecessors(BB, DTU, TTI) ||
mergeCompatibleInvokes(BB, DTU)) {
// sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
// so we may now how duplicate PHI's.
Expand Down
Loading
Loading