@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
283
283
bool tryToSimplifyUncondBranchWithICmpInIt (ICmpInst *ICI,
284
284
IRBuilder<> &Builder);
285
285
286
- bool hoistCommonCodeFromSuccessors (BasicBlock *BB , bool EqTermsOnly);
286
+ bool hoistCommonCodeFromSuccessors (Instruction *TI , bool EqTermsOnly);
287
287
bool hoistSuccIdenticalTerminatorToSwitchOrIf (
288
288
Instruction *TI, Instruction *I1,
289
289
SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1611,19 +1611,155 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
1611
1611
return false ;
1612
1612
}
1613
1613
1614
+ // / If the target supports conditional faulting,
1615
+ // / we look for the following pattern:
1616
+ // / \code
1617
+ // / BB:
1618
+ // / ...
1619
+ // / %cond = icmp ult %x, %y
1620
+ // / br i1 %cond, label %TrueBB, label %FalseBB
1621
+ // / FalseBB:
1622
+ // / store i32 1, ptr %q, align 4
1623
+ // / ...
1624
+ // / TrueBB:
1625
+ // / %maskedloadstore = load i32, ptr %b, align 4
1626
+ // / store i32 %maskedloadstore, ptr %p, align 4
1627
+ // / ...
1628
+ // / \endcode
1629
+ // /
1630
+ // / and transform it into:
1631
+ // /
1632
+ // / \code
1633
+ // / BB:
1634
+ // / ...
1635
+ // / %cond = icmp ult %x, %y
1636
+ // / %maskedloadstore = cload i32, ptr %b, %cond
1637
+ // / cstore i32 %maskedloadstore, ptr %p, %cond
1638
+ // / cstore i32 1, ptr %q, ~%cond
1639
+ // / br i1 %cond, label %TrueBB, label %FalseBB
1640
+ // / FalseBB:
1641
+ // / ...
1642
+ // / TrueBB:
1643
+ // / ...
1644
+ // / \endcode
1645
+ // /
1646
+ // / where cload/cstore are represented by llvm.masked.load/store intrinsics,
1647
+ // / e.g.
1648
+ // /
1649
+ // / \code
1650
+ // / %vcond = bitcast i1 %cond to <1 x i1>
1651
+ // / %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1652
+ // / (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1653
+ // / %maskedloadstore = bitcast <1 x i32> %v0 to i32
1654
+ // / call void @llvm.masked.store.v1i32.p0
1655
+ // / (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1656
+ // / %cond.not = xor i1 %cond, true
1657
+ // / %vcond.not = bitcast i1 %cond.not to <1 x i>
1658
+ // / call void @llvm.masked.store.v1i32.p0
1659
+ // / (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1660
+ // / \endcode
1661
+ // /
1662
+ // / So we need to turn hoisted load/store into cload/cstore.
1663
+ static void hoistConditionalLoadsStores (
1664
+ BranchInst *BI,
1665
+ SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1666
+ bool Invert) {
1667
+ auto &Context = BI->getParent ()->getContext ();
1668
+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
1669
+ auto *Cond = BI->getOperand (0 );
1670
+ // Construct the condition if needed.
1671
+ BasicBlock *BB = BI->getParent ();
1672
+ IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
1673
+ Value *Mask = Builder.CreateBitCast (
1674
+ Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
1675
+ VCondTy);
1676
+ for (auto *I : SpeculatedConditionalLoadsStores) {
1677
+ IRBuilder<> Builder (I);
1678
+ // We currently assume conditional faulting load/store is supported for
1679
+ // scalar types only when creating new instructions. This can be easily
1680
+ // extended for vector types in the future.
1681
+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
1682
+ auto *Op0 = I->getOperand (0 );
1683
+ CallInst *MaskedLoadStore = nullptr ;
1684
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
1685
+ // Handle Load.
1686
+ auto *Ty = I->getType ();
1687
+ PHINode *PN = nullptr ;
1688
+ Value *PassThru = nullptr ;
1689
+ for (User *U : I->users ())
1690
+ if ((PN = dyn_cast<PHINode>(U))) {
1691
+ PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
1692
+ FixedVectorType::get (Ty, 1 ));
1693
+ break ;
1694
+ }
1695
+ MaskedLoadStore = Builder.CreateMaskedLoad (
1696
+ FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
1697
+ Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
1698
+ if (PN)
1699
+ PN->setIncomingValue (PN->getBasicBlockIndex (BB), NewLoadStore);
1700
+ I->replaceAllUsesWith (NewLoadStore);
1701
+ } else {
1702
+ // Handle Store.
1703
+ auto *StoredVal =
1704
+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
1705
+ MaskedLoadStore = Builder.CreateMaskedStore (
1706
+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
1707
+ }
1708
+ // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1709
+ // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1710
+ //
1711
+ // !nonnull, !align : Not support pointer type, no need to keep.
1712
+ // !range: Load type is changed from scalar to vector, but the metadata on
1713
+ // vector specifies a per-element range, so the semantics stay the
1714
+ // same. Keep it.
1715
+ // !annotation: Not impact semantics. Keep it.
1716
+ if (const MDNode *Ranges = I->getMetadata (LLVMContext::MD_range))
1717
+ MaskedLoadStore->addRangeRetAttr (getConstantRangeFromMetadata (*Ranges));
1718
+ I->dropUBImplyingAttrsAndUnknownMetadata ({LLVMContext::MD_annotation});
1719
+ // FIXME: DIAssignID is not supported for masked store yet.
1720
+ // (Verifier::visitDIAssignIDMetadata)
1721
+ at::deleteAssignmentMarkers (I);
1722
+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
1723
+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
1724
+ });
1725
+ MaskedLoadStore->copyMetadata (*I);
1726
+ I->eraseFromParent ();
1727
+ }
1728
+ }
1729
+
1730
+ static bool isSafeCheapLoadStore (const Instruction *I,
1731
+ const TargetTransformInfo &TTI) {
1732
+ // Not handle volatile or atomic.
1733
+ if (auto *L = dyn_cast<LoadInst>(I)) {
1734
+ if (!L->isSimple ())
1735
+ return false ;
1736
+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
1737
+ if (!S->isSimple ())
1738
+ return false ;
1739
+ } else
1740
+ return false ;
1741
+
1742
+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
1743
+ // That's why we have the alignment limitation.
1744
+ // FIXME: Update the prototype of the intrinsics?
1745
+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
1746
+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
1747
+ }
1748
+
1614
1749
// / Hoist any common code in the successor blocks up into the block. This
1615
1750
// / function guarantees that BB dominates all successors. If EqTermsOnly is
1616
1751
// / given, only perform hoisting in case both blocks only contain a terminator.
1617
1752
// / In that case, only the original BI will be replaced and selects for PHIs are
1618
1753
// / added.
1619
- bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (BasicBlock *BB ,
1754
+ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors (Instruction *TI ,
1620
1755
bool EqTermsOnly) {
1621
1756
// This does very trivial matching, with limited scanning, to find identical
1622
1757
// instructions in the two blocks. In particular, we don't want to get into
1623
1758
// O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1624
1759
// such, we currently just scan for obviously identical instructions in an
1625
1760
// identical order, possibly separated by the same number of non-identical
1626
1761
// instructions.
1762
+ BasicBlock *BB = TI->getParent ();
1627
1763
unsigned int SuccSize = succ_size (BB);
1628
1764
if (SuccSize < 2 )
1629
1765
return false ;
@@ -1635,8 +1771,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1635
1771
if (Succ->hasAddressTaken () || !Succ->getSinglePredecessor ())
1636
1772
return false ;
1637
1773
1638
- auto *TI = BB->getTerminator ();
1639
-
1640
1774
// The second of pair is a SkipFlags bitmask.
1641
1775
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned >;
1642
1776
SmallVector<SuccIterPair, 8 > SuccIterPairs;
@@ -2997,25 +3131,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
2997
3131
return BIEndProb < Likely;
2998
3132
}
2999
3133
3000
- static bool isSafeCheapLoadStore (const Instruction *I,
3001
- const TargetTransformInfo &TTI) {
3002
- // Not handle volatile or atomic.
3003
- if (auto *L = dyn_cast<LoadInst>(I)) {
3004
- if (!L->isSimple ())
3005
- return false ;
3006
- } else if (auto *S = dyn_cast<StoreInst>(I)) {
3007
- if (!S->isSimple ())
3008
- return false ;
3009
- } else
3010
- return false ;
3011
-
3012
- // llvm.masked.load/store use i32 for alignment while load/store use i64.
3013
- // That's why we have the alignment limitation.
3014
- // FIXME: Update the prototype of the intrinsics?
3015
- return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3016
- getLoadStoreAlignment (I) < Value::MaximumAlignment;
3017
- }
3018
-
3019
3134
// / Speculate a conditional basic block flattening the CFG.
3020
3135
// /
3021
3136
// / Note that this is a very risky transform currently. Speculating
@@ -3267,118 +3382,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3267
3382
BB->splice (BI->getIterator (), ThenBB, ThenBB->begin (),
3268
3383
std::prev (ThenBB->end ()));
3269
3384
3270
- // If the target supports conditional faulting,
3271
- // we look for the following pattern:
3272
- // \code
3273
- // BB:
3274
- // ...
3275
- // %cond = icmp ult %x, %y
3276
- // br i1 %cond, label %TrueBB, label %FalseBB
3277
- // FalseBB:
3278
- // store i32 1, ptr %q, align 4
3279
- // ...
3280
- // TrueBB:
3281
- // %maskedloadstore = load i32, ptr %b, align 4
3282
- // store i32 %maskedloadstore, ptr %p, align 4
3283
- // ...
3284
- // \endcode
3285
- //
3286
- // and transform it into:
3287
- //
3288
- // \code
3289
- // BB:
3290
- // ...
3291
- // %cond = icmp ult %x, %y
3292
- // %maskedloadstore = cload i32, ptr %b, %cond
3293
- // cstore i32 %maskedloadstore, ptr %p, %cond
3294
- // cstore i32 1, ptr %q, ~%cond
3295
- // br i1 %cond, label %TrueBB, label %FalseBB
3296
- // FalseBB:
3297
- // ...
3298
- // TrueBB:
3299
- // ...
3300
- // \endcode
3301
- //
3302
- // where cload/cstore are represented by llvm.masked.load/store intrinsics,
3303
- // e.g.
3304
- //
3305
- // \code
3306
- // %vcond = bitcast i1 %cond to <1 x i1>
3307
- // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
3308
- // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
3309
- // %maskedloadstore = bitcast <1 x i32> %v0 to i32
3310
- // call void @llvm.masked.store.v1i32.p0
3311
- // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
3312
- // %cond.not = xor i1 %cond, true
3313
- // %vcond.not = bitcast i1 %cond.not to <1 x i>
3314
- // call void @llvm.masked.store.v1i32.p0
3315
- // (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
3316
- // \endcode
3317
- //
3318
- // So we need to turn hoisted load/store into cload/cstore.
3319
- auto &Context = BI->getParent ()->getContext ();
3320
- auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
3321
- auto *Cond = BI->getOperand (0 );
3322
- Value *Mask = nullptr ;
3323
- // Construct the condition if needed.
3324
- if (!SpeculatedConditionalLoadsStores.empty ()) {
3325
- IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
3326
- Mask = Builder.CreateBitCast (
3327
- Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
3328
- VCondTy);
3329
- }
3330
- for (auto *I : SpeculatedConditionalLoadsStores) {
3331
- IRBuilder<> Builder (I);
3332
- // We currently assume conditional faulting load/store is supported for
3333
- // scalar types only when creating new instructions. This can be easily
3334
- // extended for vector types in the future.
3335
- assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
3336
- auto *Op0 = I->getOperand (0 );
3337
- CallInst *MaskedLoadStore = nullptr ;
3338
- if (auto *LI = dyn_cast<LoadInst>(I)) {
3339
- // Handle Load.
3340
- auto *Ty = I->getType ();
3341
- PHINode *PN = nullptr ;
3342
- Value *PassThru = nullptr ;
3343
- for (User *U : I->users ())
3344
- if ((PN = dyn_cast<PHINode>(U))) {
3345
- PassThru = Builder.CreateBitCast (PN->getIncomingValueForBlock (BB),
3346
- FixedVectorType::get (Ty, 1 ));
3347
- break ;
3348
- }
3349
- MaskedLoadStore = Builder.CreateMaskedLoad (
3350
- FixedVectorType::get (Ty, 1 ), Op0, LI->getAlign (), Mask, PassThru);
3351
- Value *NewLoadStore = Builder.CreateBitCast (MaskedLoadStore, Ty);
3352
- if (PN)
3353
- PN->setIncomingValue (PN->getBasicBlockIndex (BB), NewLoadStore);
3354
- I->replaceAllUsesWith (NewLoadStore);
3355
- } else {
3356
- // Handle Store.
3357
- auto *StoredVal =
3358
- Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
3359
- MaskedLoadStore = Builder.CreateMaskedStore (
3360
- StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
3361
- }
3362
- // For non-debug metadata, only !annotation, !range, !nonnull and !align are
3363
- // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
3364
- //
3365
- // !nonnull, !align : Not support pointer type, no need to keep.
3366
- // !range: Load type is changed from scalar to vector, but the metadata on
3367
- // vector specifies a per-element range, so the semantics stay the
3368
- // same. Keep it.
3369
- // !annotation: Not impact semantics. Keep it.
3370
- if (const MDNode *Ranges = I->getMetadata (LLVMContext::MD_range))
3371
- MaskedLoadStore->addRangeRetAttr (getConstantRangeFromMetadata (*Ranges));
3372
- I->dropUBImplyingAttrsAndUnknownMetadata ({LLVMContext::MD_annotation});
3373
- // FIXME: DIAssignID is not supported for masked store yet.
3374
- // (Verifier::visitDIAssignIDMetadata)
3375
- at::deleteAssignmentMarkers (I);
3376
- I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
3377
- return Node->getMetadataID () == Metadata::DIAssignIDKind;
3378
- });
3379
- MaskedLoadStore->copyMetadata (*I);
3380
- I->eraseFromParent ();
3381
- }
3385
+ if (!SpeculatedConditionalLoadsStores.empty ())
3386
+ hoistConditionalLoadsStores (BI, SpeculatedConditionalLoadsStores, Invert);
3382
3387
3383
3388
// Insert selects and rewrite the PHI operands.
3384
3389
IRBuilder<NoFolder> Builder (BI);
@@ -7449,7 +7454,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7449
7454
return requestResimplify ();
7450
7455
7451
7456
if (HoistCommon &&
7452
- hoistCommonCodeFromSuccessors (SI-> getParent () , !Options.HoistCommonInsts ))
7457
+ hoistCommonCodeFromSuccessors (SI, !Options.HoistCommonInsts ))
7453
7458
return requestResimplify ();
7454
7459
7455
7460
return false ;
@@ -7807,8 +7812,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7807
7812
// can hoist it up to the branching block.
7808
7813
if (BI->getSuccessor (0 )->getSinglePredecessor ()) {
7809
7814
if (BI->getSuccessor (1 )->getSinglePredecessor ()) {
7810
- if (HoistCommon && hoistCommonCodeFromSuccessors (
7811
- BI-> getParent () , !Options.HoistCommonInsts ))
7815
+ if (HoistCommon &&
7816
+ hoistCommonCodeFromSuccessors (BI , !Options.HoistCommonInsts ))
7812
7817
return requestResimplify ();
7813
7818
} else {
7814
7819
// If Successor #1 has multiple preds, we may be able to conditionally
0 commit comments