@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
283
283
bool tryToSimplifyUncondBranchWithICmpInIt (ICmpInst *ICI,
284
284
IRBuilder<> &Builder);
285
285
286
- bool hoistCommonCodeFromSuccessors (BasicBlock *BB , bool EqTermsOnly);
286
+ bool hoistCommonCodeFromSuccessors (Instruction *TI , bool EqTermsOnly);
287
287
bool hoistSuccIdenticalTerminatorToSwitchOrIf (
288
288
Instruction *TI, Instruction *I1,
289
289
SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1615,19 +1615,39 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
1615
1615
return false ;
1616
1616
}
1617
1617
1618
+ static bool isSafeCheapLoadStore (const Instruction *I,
1619
+ const TargetTransformInfo &TTI) {
1620
+ // Not handle volatile or atomic.
1621
+ if (auto *L = dyn_cast<LoadInst>(I)) {
1622
+ if (!L->isSimple ())
1623
+ return false ;
1624
+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
1625
+ if (!S->isSimple ())
1626
+ return false ;
1627
+ } else
1628
+ return false ;
1629
+
1630
+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
1631
+ // That's why we have the alignment limitation.
1632
+ // FIXME: Update the prototype of the intrinsics?
1633
+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
1634
+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
1635
+ }
1636
+
1618
1637
// / Hoist any common code in the successor blocks up into the block. This
1619
1638
// / function guarantees that BB dominates all successors. If EqTermsOnly is
1620
1639
// / given, only perform hoisting in case both blocks only contain a terminator.
1621
1640
// / In that case, only the original BI will be replaced and selects for PHIs are
1622
1641
// / added.
1623
- bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (BasicBlock *BB ,
1642
+ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (Instruction *TI ,
1624
1643
bool EqTermsOnly) {
1625
1644
// This does very trivial matching, with limited scanning, to find identical
1626
1645
// instructions in the two blocks. In particular, we don't want to get into
1627
1646
// O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1628
1647
// such, we currently just scan for obviously identical instructions in an
1629
1648
// identical order, possibly separated by the same number of non-identical
1630
1649
// instructions.
1650
+ BasicBlock *BB = TI->getParent ();
1631
1651
unsigned int SuccSize = succ_size (BB);
1632
1652
if (SuccSize < 2 )
1633
1653
return false ;
@@ -1639,7 +1659,63 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1639
1659
if (Succ->hasAddressTaken () || !Succ->getSinglePredecessor ())
1640
1660
return false ;
1641
1661
1642
- auto *TI = BB->getTerminator ();
1662
+ auto *BI = dyn_cast<BranchInst>(TI);
1663
+ if (BI && HoistLoadsStoresWithCondFaulting &&
1664
+ Options.HoistLoadsStoresWithCondFaulting ) {
1665
+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
1666
+ for (auto *Succ : successors (BB)) {
1667
+ for (Instruction &I : drop_end (*Succ)) {
1668
+ if (!isSafeCheapLoadStore (&I, TTI) ||
1669
+ SpeculatedConditionalLoadsStores.size () ==
1670
+ HoistLoadsStoresWithCondFaultingThreshold)
1671
+ return false ;
1672
+ SpeculatedConditionalLoadsStores.push_back (&I);
1673
+ }
1674
+ }
1675
+
1676
+ // TODO: Move below code to a function to share with #96878.
1677
+ if (SpeculatedConditionalLoadsStores.empty ())
1678
+ return false ;
1679
+
1680
+ auto &Context = BI->getParent ()->getContext ();
1681
+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
1682
+ auto *Cond = BI->getOperand (0 );
1683
+ IRBuilder<> Builder (BI);
1684
+ Value *Mask1 = Builder.CreateBitCast (Cond, VCondTy);
1685
+ Value *Mask0 = Builder.CreateBitCast (
1686
+ Builder.CreateXor (Cond, ConstantInt::getTrue (Context)), VCondTy);
1687
+ for (auto *I : SpeculatedConditionalLoadsStores) {
1688
+ Value *Mask = I->getParent () == BI->getSuccessor (0 ) ? Mask1 : Mask0;
1689
+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
1690
+ auto *Op0 = I->getOperand (0 );
1691
+ Instruction *MaskedLoadStore = nullptr ;
1692
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
1693
+ // Handle Load.
1694
+ auto *Ty = I->getType ();
1695
+ MaskedLoadStore = Builder.CreateMaskedLoad (FixedVectorType::get (Ty, 1 ),
1696
+ Op0, LI->getAlign (), Mask);
1697
+ I->replaceAllUsesWith (Builder.CreateBitCast (MaskedLoadStore, Ty));
1698
+ } else {
1699
+ // Handle Store.
1700
+ auto *StoredVal =
1701
+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1702
+ MaskedLoadStore = Builder.CreateMaskedStore (
1703
+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
1704
+ }
1705
+ I->dropUBImplyingAttrsAndUnknownMetadata (
1706
+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
1707
+ // FIXME: DIAssignID is not supported for masked store yet.
1708
+ // (Verifier::visitDIAssignIDMetadata)
1709
+ at::deleteAssignmentMarkers (I);
1710
+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
1711
+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
1712
+ });
1713
+ MaskedLoadStore->copyMetadata (*I);
1714
+ I->eraseFromParent ();
1715
+ }
1716
+
1717
+ return true ;
1718
+ }
1643
1719
1644
1720
// The second of pair is a SkipFlags bitmask.
1645
1721
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned >;
@@ -2998,25 +3074,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
2998
3074
return BIEndProb < Likely;
2999
3075
}
3000
3076
3001
- static bool isSafeCheapLoadStore (const Instruction *I,
3002
- const TargetTransformInfo &TTI) {
3003
- // Not handle volatile or atomic.
3004
- if (auto *L = dyn_cast<LoadInst>(I)) {
3005
- if (!L->isSimple ())
3006
- return false ;
3007
- } else if (auto *S = dyn_cast<StoreInst>(I)) {
3008
- if (!S->isSimple ())
3009
- return false ;
3010
- } else
3011
- return false ;
3012
-
3013
- // llvm.masked.load/store use i32 for alignment while load/store use i64.
3014
- // That's why we have the alignment limitation.
3015
- // FIXME: Update the prototype of the intrinsics?
3016
- return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3017
- getLoadStoreAlignment (I) < Value::MaximumAlignment;
3018
- }
3019
-
3020
3077
// / Speculate a conditional basic block flattening the CFG.
3021
3078
// /
3022
3079
// / Note that this is a very risky transform currently. Speculating
@@ -7436,7 +7493,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7436
7493
return requestResimplify ();
7437
7494
7438
7495
if (HoistCommon &&
7439
- hoistCommonCodeFromSuccessors (SI-> getParent () , !Options.HoistCommonInsts ))
7496
+ hoistCommonCodeFromSuccessors (SI, !Options.HoistCommonInsts ))
7440
7497
return requestResimplify ();
7441
7498
7442
7499
return false ;
@@ -7794,8 +7851,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7794
7851
// can hoist it up to the branching block.
7795
7852
if (BI->getSuccessor (0 )->getSinglePredecessor ()) {
7796
7853
if (BI->getSuccessor (1 )->getSinglePredecessor ()) {
7797
- if (HoistCommon && hoistCommonCodeFromSuccessors (
7798
- BI-> getParent () , !Options.HoistCommonInsts ))
7854
+ if (HoistCommon &&
7855
+ hoistCommonCodeFromSuccessors (BI , !Options.HoistCommonInsts ))
7799
7856
return requestResimplify ();
7800
7857
} else {
7801
7858
// If Successor #1 has multiple preds, we may be able to conditionally
0 commit comments