@@ -1662,21 +1662,43 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
1662
1662
// / \endcode
1663
1663
// /
1664
1664
// / So we need to turn hoisted load/store into cload/cstore.
1665
+ // /
1666
+ // / \param BI The branch instruction.
1667
+ // / \param SpeculatedConditionalLoadsStores The load/store instructions that
1668
+ // / will be speculated.
1669
+ // / \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1665
1670
static void hoistConditionalLoadsStores (
1666
1671
BranchInst *BI,
1667
1672
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1668
- bool Invert) {
1673
+ std::optional< bool > Invert) {
1669
1674
auto &Context = BI->getParent ()->getContext ();
1670
1675
auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
1671
1676
auto *Cond = BI->getOperand (0 );
1672
1677
// Construct the condition if needed.
1673
1678
BasicBlock *BB = BI->getParent ();
1674
- IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
1675
- Value *Mask = Builder.CreateBitCast (
1676
- Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1677
- VCondTy);
1679
+ IRBuilder<> Builder (
1680
+ Invert.has_value () ? SpeculatedConditionalLoadsStores.back () : BI);
1681
+ Value *Mask = nullptr ;
1682
+ Value *MaskFalse = nullptr ;
1683
+ Value *MaskTrue = nullptr ;
1684
+ if (Invert.has_value ()) {
1685
+ Mask = Builder.CreateBitCast (
1686
+ *Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1687
+ VCondTy);
1688
+ } else {
1689
+ MaskFalse = Builder.CreateBitCast (
1690
+ Builder.CreateXor (Cond, ConstantInt::getTrue (Context)), VCondTy);
1691
+ MaskTrue = Builder.CreateBitCast (Cond, VCondTy);
1692
+ }
1693
+ auto PeekThroughBitcasts = [](Value *V) {
1694
+ while (auto *BitCast = dyn_cast<BitCastInst>(V))
1695
+ V = BitCast->getOperand (0 );
1696
+ return V;
1697
+ };
1678
1698
for (auto *I : SpeculatedConditionalLoadsStores) {
1679
- IRBuilder<> Builder (I);
1699
+ IRBuilder<> Builder (Invert.has_value () ? I : BI);
1700
+ if (!Invert.has_value ())
1701
+ Mask = I->getParent () == BI->getSuccessor (0 ) ? MaskTrue : MaskFalse;
1680
1702
// We currently assume conditional faulting load/store is supported for
1681
1703
// scalar types only when creating new instructions. This can be easily
1682
1704
// extended for vector types in the future.
@@ -1688,12 +1710,14 @@ static void hoistConditionalLoadsStores(
1688
1710
auto *Ty = I->getType ();
1689
1711
PHINode *PN = nullptr ;
1690
1712
Value *PassThru = nullptr ;
1691
- for (User *U : I->users ())
1692
- if ((PN = dyn_cast<PHINode>(U))) {
1693
- PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
1694
- FixedVectorType::get (Ty, 1 ));
1695
- break ;
1696
- }
1713
+ if (Invert.has_value ())
1714
+ for (User *U : I->users ())
1715
+ if ((PN = dyn_cast<PHINode>(U))) {
1716
+ PassThru = Builder.CreateBitCast (
1717
+ PeekThroughBitcasts (PN->getIncomingValueForBlock (BB)),
1718
+ FixedVectorType::get (Ty, 1 ));
1719
+ break ;
1720
+ }
1697
1721
MaskedLoadStore = Builder.CreateMaskedLoad (
1698
1722
FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
1699
1723
Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
@@ -1702,8 +1726,8 @@ static void hoistConditionalLoadsStores(
1702
1726
I->replaceAllUsesWith (NewLoadStore);
1703
1727
} else {
1704
1728
// Handle Store.
1705
- auto *StoredVal =
1706
- Builder. CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1729
+ auto *StoredVal = Builder. CreateBitCast (
1730
+ PeekThroughBitcasts (Op0) , FixedVectorType::get (Op0->getType (), 1 ));
1707
1731
MaskedLoadStore = Builder.CreateMaskedStore (
1708
1732
StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
1709
1733
}
@@ -3155,7 +3179,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
3155
3179
return HaveRewritablePHIs;
3156
3180
}
3157
3181
3158
- static bool isProfitableToSpeculate (const BranchInst *BI, bool Invert,
3182
+ static bool isProfitableToSpeculate (const BranchInst *BI,
3183
+ std::optional<bool > Invert,
3159
3184
const TargetTransformInfo &TTI) {
3160
3185
// If the branch is non-unpredictable, and is predicted to *not* branch to
3161
3186
// the `then` block, then avoid speculating it.
@@ -3166,7 +3191,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
3166
3191
if (!extractBranchWeights (*BI, TWeight, FWeight) || (TWeight + FWeight) == 0 )
3167
3192
return true ;
3168
3193
3169
- uint64_t EndWeight = Invert ? TWeight : FWeight;
3194
+ if (!Invert.has_value ())
3195
+ return false ;
3196
+
3197
+ uint64_t EndWeight = *Invert ? TWeight : FWeight;
3170
3198
BranchProbability BIEndProb =
3171
3199
BranchProbability::getBranchProbability (EndWeight, TWeight + FWeight);
3172
3200
BranchProbability Likely = TTI.getPredictableBranchThreshold ();
@@ -8034,6 +8062,35 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
8034
8062
if (HoistCommon &&
8035
8063
hoistCommonCodeFromSuccessors (BI, !Options.HoistCommonInsts ))
8036
8064
return requestResimplify ();
8065
+
8066
+ if (BI && HoistLoadsStoresWithCondFaulting &&
8067
+ Options.HoistLoadsStoresWithCondFaulting &&
8068
+ isProfitableToSpeculate (BI, std::nullopt, TTI)) {
8069
+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
8070
+ auto CanSpeculateConditionalLoadsStores = [&]() {
8071
+ for (auto *Succ : successors (BB)) {
8072
+ for (Instruction &I : *Succ) {
8073
+ if (I.isTerminator ()) {
8074
+ if (I.getNumSuccessors () > 1 )
8075
+ return false ;
8076
+ continue ;
8077
+ } else if (!isSafeCheapLoadStore (&I, TTI) ||
8078
+ SpeculatedConditionalLoadsStores.size () ==
8079
+ HoistLoadsStoresWithCondFaultingThreshold) {
8080
+ return false ;
8081
+ }
8082
+ SpeculatedConditionalLoadsStores.push_back (&I);
8083
+ }
8084
+ }
8085
+ return !SpeculatedConditionalLoadsStores.empty ();
8086
+ };
8087
+
8088
+ if (CanSpeculateConditionalLoadsStores ()) {
8089
+ hoistConditionalLoadsStores (BI, SpeculatedConditionalLoadsStores,
8090
+ std::nullopt);
8091
+ return requestResimplify ();
8092
+ }
8093
+ }
8037
8094
} else {
8038
8095
// If Successor #1 has multiple preds, we may be able to conditionally
8039
8096
// execute Successor #0 if it branches to Successor #1.
0 commit comments