Skip to content

[X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part I) #96878

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct SimplifyCFGOptions {
bool ConvertSwitchToLookupTable = false;
bool NeedCanonicalLoop = true;
bool HoistCommonInsts = false;
bool HoistLoadsStoresWithCondFaulting = false;
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
bool SpeculateBlocks = true;
Expand Down Expand Up @@ -59,6 +60,10 @@ struct SimplifyCFGOptions {
HoistCommonInsts = B;
return *this;
}
SimplifyCFGOptions &hoistLoadsStoresWithCondFaulting(bool B) {
HoistLoadsStoresWithCondFaulting = B;
return *this;
}
SimplifyCFGOptions &sinkCommonInsts(bool B) {
SinkCommonInsts = B;
return *this;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.needCanonicalLoops(Enable);
} else if (ParamName == "hoist-common-insts") {
Result.hoistCommonInsts(Enable);
} else if (ParamName == "hoist-loads-stores-with-cond-faulting") {
Result.hoistLoadsStoresWithCondFaulting(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (ParamName == "speculate-unpredictables") {
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1531,9 +1531,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,

// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.speculateUnpredictables(true)));
OptimizePM.addPass(
SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
.speculateUnpredictables(true)
.hoistLoadsStoresWithCondFaulting(true)));

// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ static cl::opt<bool> UserHoistCommonInsts(
"hoist-common-insts", cl::Hidden, cl::init(false),
cl::desc("hoist common instructions (default = false)"));

static cl::opt<bool> UserHoistLoadsStoresWithCondFaulting(
"hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(false),
cl::desc("Hoist loads/stores if the target supports conditional faulting "
"(default = false)"));

static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
Expand Down Expand Up @@ -326,6 +331,9 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.NeedCanonicalLoop = UserKeepLoops;
if (UserHoistCommonInsts.getNumOccurrences())
Options.HoistCommonInsts = UserHoistCommonInsts;
if (UserHoistLoadsStoresWithCondFaulting.getNumOccurrences())
Options.HoistLoadsStoresWithCondFaulting =
UserHoistLoadsStoresWithCondFaulting;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
if (UserSpeculateUnpredictables.getNumOccurrences())
Expand Down Expand Up @@ -354,6 +362,8 @@ void SimplifyCFGPass::printPipeline(
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
OS << (Options.HoistLoadsStoresWithCondFaulting ? "" : "no-")
<< "hoist-loads-stores-with-cond-faulting;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
Expand Down
164 changes: 155 additions & 9 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,18 @@ static cl::opt<bool>
HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
cl::desc("Hoist common instructions up to the parent block"));

static cl::opt<bool> HoistLoadsStoresWithCondFaulting(
"simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
cl::init(true),
cl::desc("Hoist loads/stores if the target supports "
"conditional faulting"));

static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
cl::desc("Control the maximal conditonal load/store that we are willing "
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));

static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
Expand Down Expand Up @@ -2978,6 +2990,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
return BIEndProb < Likely;
}

static bool isSafeCheapLoadStore(const Instruction *I,
const TargetTransformInfo &TTI) {
// Not handle volatile or atomic.
if (auto *L = dyn_cast<LoadInst>(I)) {
if (!L->isSimple())
return false;
} else if (auto *S = dyn_cast<StoreInst>(I)) {
if (!S->isSimple())
return false;
} else
return false;

// llvm.masked.load/store use i32 for alignment while load/store use i64.
// That's why we have the alignment limitation.
// FIXME: Update the prototype of the intrinsics?
return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
getLoadStoreAlignment(I) < Value::MaximumAlignment;
}

/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
Expand Down Expand Up @@ -3052,6 +3083,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;

unsigned SpeculatedInstructions = 0;
bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
Expand Down Expand Up @@ -3080,22 +3114,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,

// Only speculatively execute a single instruction (not counting the
// terminator) for now.
++SpeculatedInstructions;
bool IsSafeCheapLoadStore = HoistLoadsStores &&
isSafeCheapLoadStore(&I, TTI) &&
SpeculatedConditionalLoadsStores.size() <
HoistLoadsStoresWithCondFaultingThreshold;
// Not count load/store into cost if target supports conditional faulting
// b/c it's cheap to speculate it.
if (IsSafeCheapLoadStore)
SpeculatedConditionalLoadsStores.push_back(&I);
else
++SpeculatedInstructions;

if (SpeculatedInstructions > 1)
return false;

// Don't hoist the instruction if it's unsafe or expensive.
if (!isSafeToSpeculativelyExecute(&I) &&
!(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
&I, BB, ThenBB, EndBB))))
if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute(&I) &&
!(HoistCondStores && !SpeculatedStoreValue &&
(SpeculatedStoreValue =
isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
return false;
if (!SpeculatedStoreValue &&
if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
computeSpeculationCost(&I, TTI) >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;

// Store the store speculation candidate.
if (SpeculatedStoreValue)
if (!SpeculatedStore && SpeculatedStoreValue)
SpeculatedStore = cast<StoreInst>(&I);

// Do not hoist the instruction if any of its operands are defined but not
Expand All @@ -3122,11 +3167,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,

// Check that we can insert the selects and that it's not too expensive to do
// so.
bool Convert = SpeculatedStore != nullptr;
bool Convert =
SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
InstructionCost Cost = 0;
Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
SpeculatedInstructions,
Cost, TTI);
SpeculatedInstructions, Cost, TTI);
if (!Convert || Cost > Budget)
return false;

Expand Down Expand Up @@ -3214,6 +3259,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
std::prev(ThenBB->end()));

// If the target supports conditional faulting,
// we look for the following pattern:
// \code
// BB:
// ...
// %cond = icmp ult %x, %y
// br i1 %cond, label %TrueBB, label %FalseBB
// FalseBB:
// store i32 1, ptr %q, align 4
// ...
// TrueBB:
// %maskedloadstore = load i32, ptr %b, align 4
// store i32 %maskedloadstore, ptr %p, align 4
// ...
// \endcode
//
// and transform it into:
//
// \code
// BB:
// ...
// %cond = icmp ult %x, %y
// %maskedloadstore = cload i32, ptr %b, %cond
// cstore i32 %maskedloadstore, ptr %p, %cond
// cstore i32 1, ptr %q, ~%cond
// br i1 %cond, label %TrueBB, label %FalseBB
// FalseBB:
// ...
// TrueBB:
// ...
// \endcode
//
// where cload/cstore are represented by llvm.masked.load/store intrinsics,
// e.g.
//
// \code
// %vcond = bitcast i1 %cond to <1 x i1>
// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
// %maskedloadstore = bitcast <1 x i32> %v0 to i32
// call void @llvm.masked.store.v1i32.p0
// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
// %cond.not = xor i1 %cond, true
// %vcond.not = bitcast i1 %cond.not to <1 x i>
// call void @llvm.masked.store.v1i32.p0
// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
// \endcode
//
// So we need to turn hoisted load/store into cload/cstore.
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
Value *Mask = nullptr;
// Construct the condition if needed.
if (!SpeculatedConditionalLoadsStores.empty()) {
IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
Mask = Builder.CreateBitCast(
Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
}
for (auto *I : SpeculatedConditionalLoadsStores) {
IRBuilder<> Builder(I);
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
// extended for vector types in the future.
assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
auto *Op0 = I->getOperand(0);
Instruction *MaskedLoadStore = nullptr;
if (auto *LI = dyn_cast<LoadInst>(I)) {
// Handle Load.
auto *Ty = I->getType();
MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
Op0, LI->getAlign(), Mask);
I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
} else {
// Handle Store.
auto *StoredVal =
Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
MaskedLoadStore = Builder.CreateMaskedStore(
StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
}
// For non-debug metadata, only !annotation, !range, !nonnull and !align are
// kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
//
// !nonnull, !align : Not support pointer type, no need to keep.
// !range: Load type is changed from scalar to vector, but the metadata on
// vector specifies a per-element range, so the semantics stay the
// same. Keep it.
// !annotation: Not impact semantics. Keep it.
I->dropUBImplyingAttrsAndUnknownMetadata(
{LLVMContext::MD_range, LLVMContext::MD_annotation});
// FIXME: DIAssignID is not supported for masked store yet.
// (Verifier::visitDIAssignIDMetadata)
at::deleteAssignmentMarkers(I);
I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
return Node->getMetadataID() == Metadata::DIAssignIDKind;
});
MaskedLoadStore->copyMetadata(*I);
I->eraseFromParent();
}

// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
for (PHINode &PN : EndBB->phis()) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Other/new-pm-print-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)

; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -O1 -S | FileCheck %s

;; Test masked.load/store.v1* is generated in simplifycfg and not falls back to branch+load/store in following passes.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: @basic(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
; CHECK-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false

if.false:
br label %if.end

if.true:
%pv = load i16, ptr %p, align 2
%qv = load i32, ptr %q, align 4
%bv = load i64, ptr %b, align 8
store i16 %pv, ptr %b, align 2
store i32 %qv, ptr %p, align 4
store i64 %bv, ptr %q, align 8
br label %if.false

if.end:
ret void
}
Loading