Skip to content

Commit 3a4796c

Browse files
committed
[X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part II)
This is a follow up of #96878 to support hoisting load/store for diamond CFG. ``` void test (int a, int *c, int *d) { if (a) *c = a; else *d = a; } ```
1 parent f427028 commit 3a4796c

File tree

2 files changed

+118
-35
lines changed

2 files changed

+118
-35
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
283283
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
284284
IRBuilder<> &Builder);
285285

286-
bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
286+
bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
287287
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
288288
Instruction *TI, Instruction *I1,
289289
SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1615,19 +1615,39 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
16151615
return false;
16161616
}
16171617

1618+
static bool isSafeCheapLoadStore(const Instruction *I,
1619+
const TargetTransformInfo &TTI) {
1620+
// Not handle volatile or atomic.
1621+
if (auto *L = dyn_cast<LoadInst>(I)) {
1622+
if (!L->isSimple())
1623+
return false;
1624+
} else if (auto *S = dyn_cast<StoreInst>(I)) {
1625+
if (!S->isSimple())
1626+
return false;
1627+
} else
1628+
return false;
1629+
1630+
// llvm.masked.load/store use i32 for alignment while load/store use i64.
1631+
// That's why we have the alignment limitation.
1632+
// FIXME: Update the prototype of the intrinsics?
1633+
return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
1634+
getLoadStoreAlignment(I) < Value::MaximumAlignment;
1635+
}
1636+
16181637
/// Hoist any common code in the successor blocks up into the block. This
16191638
/// function guarantees that BB dominates all successors. If EqTermsOnly is
16201639
/// given, only perform hoisting in case both blocks only contain a terminator.
16211640
/// In that case, only the original BI will be replaced and selects for PHIs are
16221641
/// added.
1623-
bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1642+
bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
16241643
bool EqTermsOnly) {
16251644
// This does very trivial matching, with limited scanning, to find identical
16261645
// instructions in the two blocks. In particular, we don't want to get into
16271646
// O(N1*N2*...) situations here where Ni are the sizes of these successors. As
16281647
// such, we currently just scan for obviously identical instructions in an
16291648
// identical order, possibly separated by the same number of non-identical
16301649
// instructions.
1650+
BasicBlock *BB = TI->getParent();
16311651
unsigned int SuccSize = succ_size(BB);
16321652
if (SuccSize < 2)
16331653
return false;
@@ -1639,7 +1659,63 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
16391659
if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
16401660
return false;
16411661

1642-
auto *TI = BB->getTerminator();
1662+
auto *BI = dyn_cast<BranchInst>(TI);
1663+
if (BI && HoistLoadsStoresWithCondFaulting &&
1664+
Options.HoistLoadsStoresWithCondFaulting) {
1665+
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
1666+
for (auto *Succ : successors(BB)) {
1667+
for (Instruction &I : drop_end(*Succ)) {
1668+
if (!isSafeCheapLoadStore(&I, TTI) ||
1669+
SpeculatedConditionalLoadsStores.size() ==
1670+
HoistLoadsStoresWithCondFaultingThreshold)
1671+
return false;
1672+
SpeculatedConditionalLoadsStores.push_back(&I);
1673+
}
1674+
}
1675+
1676+
// TODO: Move below code to a function to share with #96878.
1677+
if (SpeculatedConditionalLoadsStores.empty())
1678+
return false;
1679+
1680+
auto &Context = BI->getParent()->getContext();
1681+
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1682+
auto *Cond = BI->getOperand(0);
1683+
IRBuilder<> Builder(BI);
1684+
Value *Mask1 = Builder.CreateBitCast(Cond, VCondTy);
1685+
Value *Mask0 = Builder.CreateBitCast(
1686+
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1687+
for (auto *I : SpeculatedConditionalLoadsStores) {
1688+
Value *Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
1689+
assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1690+
auto *Op0 = I->getOperand(0);
1691+
Instruction *MaskedLoadStore = nullptr;
1692+
if (auto *LI = dyn_cast<LoadInst>(I)) {
1693+
// Handle Load.
1694+
auto *Ty = I->getType();
1695+
MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
1696+
Op0, LI->getAlign(), Mask);
1697+
I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
1698+
} else {
1699+
// Handle Store.
1700+
auto *StoredVal =
1701+
Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
1702+
MaskedLoadStore = Builder.CreateMaskedStore(
1703+
StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1704+
}
1705+
I->dropUBImplyingAttrsAndUnknownMetadata(
1706+
{LLVMContext::MD_range, LLVMContext::MD_annotation});
1707+
// FIXME: DIAssignID is not supported for masked store yet.
1708+
// (Verifier::visitDIAssignIDMetadata)
1709+
at::deleteAssignmentMarkers(I);
1710+
I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1711+
return Node->getMetadataID() == Metadata::DIAssignIDKind;
1712+
});
1713+
MaskedLoadStore->copyMetadata(*I);
1714+
I->eraseFromParent();
1715+
}
1716+
1717+
return true;
1718+
}
16431719

16441720
// The second of pair is a SkipFlags bitmask.
16451721
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
@@ -2998,25 +3074,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
29983074
return BIEndProb < Likely;
29993075
}
30003076

3001-
static bool isSafeCheapLoadStore(const Instruction *I,
3002-
const TargetTransformInfo &TTI) {
3003-
// Not handle volatile or atomic.
3004-
if (auto *L = dyn_cast<LoadInst>(I)) {
3005-
if (!L->isSimple())
3006-
return false;
3007-
} else if (auto *S = dyn_cast<StoreInst>(I)) {
3008-
if (!S->isSimple())
3009-
return false;
3010-
} else
3011-
return false;
3012-
3013-
// llvm.masked.load/store use i32 for alignment while load/store use i64.
3014-
// That's why we have the alignment limitation.
3015-
// FIXME: Update the prototype of the intrinsics?
3016-
return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
3017-
getLoadStoreAlignment(I) < Value::MaximumAlignment;
3018-
}
3019-
30203077
/// Speculate a conditional basic block flattening the CFG.
30213078
///
30223079
/// Note that this is a very risky transform currently. Speculating
@@ -7436,7 +7493,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
74367493
return requestResimplify();
74377494

74387495
if (HoistCommon &&
7439-
hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7496+
hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
74407497
return requestResimplify();
74417498

74427499
return false;
@@ -7794,8 +7851,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
77947851
// can hoist it up to the branching block.
77957852
if (BI->getSuccessor(0)->getSinglePredecessor()) {
77967853
if (BI->getSuccessor(1)->getSinglePredecessor()) {
7797-
if (HoistCommon && hoistCommonCodeFromSuccessors(
7798-
BI->getParent(), !Options.HoistCommonInsts))
7854+
if (HoistCommon &&
7855+
hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
77997856
return requestResimplify();
78007857
} else {
78017858
// If Successor #1 has multiple preds, we may be able to conditionally

llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,21 +278,19 @@ if.false: ; preds = %if.true, %entry
278278
}
279279

280280
;; Both of successor 0 and successor 1 have a single predecessor.
281-
;; TODO: Support transform for this case.
282281
define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
283282
; CHECK-LABEL: @single_predecessor(
284283
; CHECK-NEXT: entry:
285284
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
286-
; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
287-
; CHECK: common.ret:
285+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
286+
; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL]], true
287+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
288+
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]])
289+
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
290+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
291+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
292+
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
288293
; CHECK-NEXT: ret void
289-
; CHECK: if.end:
290-
; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
291-
; CHECK-NEXT: br label [[COMMON_RET:%.*]]
292-
; CHECK: if.then:
293-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
294-
; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
295-
; CHECK-NEXT: br label [[COMMON_RET]]
296294
;
297295
entry:
298296
%tobool = icmp ne i32 %a, 0
@@ -674,6 +672,34 @@ if.false:
674672
ret void
675673
}
676674

675+
define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
676+
; CHECK-LABEL: @diamondCFG(
677+
; CHECK-NEXT: entry:
678+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
679+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
680+
; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT]], true
681+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
682+
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP0]])
683+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
684+
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP2]])
685+
; CHECK-NEXT: ret void
686+
;
687+
entry:
688+
%tobool.not = icmp eq i32 %a, 0
689+
br i1 %tobool.not, label %if.else, label %if.then
690+
691+
if.then: ; preds = %entry
692+
store i32 %a, ptr %c, align 4
693+
br label %if.end
694+
695+
if.else: ; preds = %entry
696+
store i32 0, ptr %d, align 4
697+
br label %if.end
698+
699+
if.end: ; preds = %if.else, %if.then
700+
ret void
701+
}
702+
677703
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
678704

679705
!llvm.dbg.cu = !{!0}

0 commit comments

Comments
 (0)