@@ -77,7 +77,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
77
77
STATISTIC (NumRetsDup, " Number of return instructions duplicated" );
78
78
STATISTIC (NumDbgValueMoved, " Number of debug value instructions moved" );
79
79
STATISTIC (NumSelectsExpanded, " Number of selects turned into branches" );
80
- STATISTIC (NumAndCmpsMoved, " Number of and/cmp's pushed into branches" );
81
80
STATISTIC (NumStoreExtractExposed, " Number of store(extractelement) exposed" );
82
81
83
82
static cl::opt<bool > DisableBranchOpts (
@@ -215,7 +214,6 @@ class TypePromotionTransaction;
215
214
bool optimizeExtractElementInst (Instruction *Inst);
216
215
bool dupRetToEnableTailCallOpts (BasicBlock *BB);
217
216
bool placeDbgValues (Function &F);
218
- bool sinkAndCmp (Function &F);
219
217
bool extLdPromotion (TypePromotionTransaction &TPT, LoadInst *&LI,
220
218
Instruction *&Inst,
221
219
const SmallVectorImpl<Instruction *> &Exts,
@@ -290,14 +288,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
290
288
// find a node corresponding to the value.
291
289
EverMadeChange |= placeDbgValues (F);
292
290
293
- // If there is a mask, compare against zero, and branch that can be combined
294
- // into a single target instruction, push the mask and compare into branch
295
- // users. Do this before OptimizeBlock -> OptimizeInst ->
296
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
297
- if (!DisableBranchOpts) {
298
- EverMadeChange |= sinkAndCmp (F);
291
+ if (!DisableBranchOpts)
299
292
EverMadeChange |= splitBranchCondition (F);
300
- }
301
293
302
294
bool MadeChange = true ;
303
295
while (MadeChange) {
@@ -1090,6 +1082,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1090
1082
return false ;
1091
1083
}
1092
1084
1085
+ // / Duplicate and sink the given 'and' instruction into user blocks where it is
1086
+ // / used in a compare to allow isel to generate better code for targets where
1087
+ // / this operation can be combined.
1088
+ // /
1089
+ // / Return true if any changes are made.
1090
+ static bool sinkAndCmp0Expression (Instruction *AndI,
1091
+ const TargetLowering &TLI,
1092
+ SetOfInstrs &InsertedInsts) {
1093
+ // Double-check that we're not trying to optimize an instruction that was
1094
+ // already optimized by some other part of this pass.
1095
+ assert (!InsertedInsts.count (AndI) &&
1096
+ " Attempting to optimize already optimized and instruction" );
1097
+ (void ) InsertedInsts;
1098
+
1099
+ // Nothing to do for single use in same basic block.
1100
+ if (AndI->hasOneUse () &&
1101
+ AndI->getParent () == cast<Instruction>(*AndI->user_begin ())->getParent ())
1102
+ return false ;
1103
+
1104
+ // Try to avoid cases where sinking/duplicating is likely to increase register
1105
+ // pressure.
1106
+ if (!isa<ConstantInt>(AndI->getOperand (0 )) &&
1107
+ !isa<ConstantInt>(AndI->getOperand (1 )) &&
1108
+ AndI->getOperand (0 )->hasOneUse () && AndI->getOperand (1 )->hasOneUse ())
1109
+ return false ;
1110
+
1111
+ for (auto *U : AndI->users ()) {
1112
+ Instruction *User = cast<Instruction>(U);
1113
+
1114
+ // Only sink for and mask feeding icmp with 0.
1115
+ if (!isa<ICmpInst>(User))
1116
+ return false ;
1117
+
1118
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand (1 ));
1119
+ if (!CmpC || !CmpC->isZero ())
1120
+ return false ;
1121
+ }
1122
+
1123
+ if (!TLI.isMaskAndCmp0FoldingBeneficial (*AndI))
1124
+ return false ;
1125
+
1126
+ DEBUG (dbgs () << " found 'and' feeding only icmp 0;\n " );
1127
+ DEBUG (AndI->getParent ()->dump ());
1128
+
1129
+ // Push the 'and' into the same block as the icmp 0. There should only be
1130
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1131
+ // others, so we don't need to keep track of which BBs we insert into.
1132
+ for (Value::user_iterator UI = AndI->user_begin (), E = AndI->user_end ();
1133
+ UI != E; ) {
1134
+ Use &TheUse = UI.getUse ();
1135
+ Instruction *User = cast<Instruction>(*UI);
1136
+
1137
+ // Preincrement use iterator so we don't invalidate it.
1138
+ ++UI;
1139
+
1140
+ DEBUG (dbgs () << " sinking 'and' use: " << *User << " \n " );
1141
+
1142
+ // Keep the 'and' in the same place if the use is already in the same block.
1143
+ Instruction *InsertPt =
1144
+ User->getParent () == AndI->getParent () ? AndI : User;
1145
+ Instruction *InsertedAnd =
1146
+ BinaryOperator::Create (Instruction::And, AndI->getOperand (0 ),
1147
+ AndI->getOperand (1 ), " " , InsertPt);
1148
+ // Propagate the debug info.
1149
+ InsertedAnd->setDebugLoc (AndI->getDebugLoc ());
1150
+
1151
+ // Replace a use of the 'and' with a use of the new 'and'.
1152
+ TheUse = InsertedAnd;
1153
+ ++NumAndUses;
1154
+ DEBUG (User->getParent ()->dump ());
1155
+ }
1156
+
1157
+ // We removed all uses, nuke the and.
1158
+ AndI->eraseFromParent ();
1159
+ return true ;
1160
+ }
1161
+
1093
1162
// / Check if the candidates could be combined with a shift instruction, which
1094
1163
// / includes:
1095
1164
// / 1. Truncate instruction
@@ -4534,13 +4603,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
4534
4603
!(Load->getType ()->isIntegerTy () || Load->getType ()->isPointerTy ()))
4535
4604
return false ;
4536
4605
4537
- // Skip loads we've already transformed or have no reason to transform.
4538
- if (Load->hasOneUse ()) {
4539
- User *LoadUser = *Load->user_begin ();
4540
- if (cast<Instruction>(LoadUser)->getParent () == Load->getParent () &&
4541
- !dyn_cast<PHINode>(LoadUser))
4542
- return false ;
4543
- }
4606
+ // Skip loads we've already transformed.
4607
+ if (Load->hasOneUse () &&
4608
+ InsertedInsts.count (cast<Instruction>(*Load->user_begin ())))
4609
+ return false ;
4544
4610
4545
4611
// Look at all uses of Load, looking through phis, to determine how many bits
4546
4612
// of the loaded value are needed.
@@ -4636,6 +4702,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
4636
4702
IRBuilder<> Builder (Load->getNextNode ());
4637
4703
auto *NewAnd = dyn_cast<Instruction>(
4638
4704
Builder.CreateAnd (Load, ConstantInt::get (Ctx, DemandBits)));
4705
+ // Mark this instruction as "inserted by CGP", so that other
4706
+ // optimizations don't touch it.
4707
+ InsertedInsts.insert (NewAnd);
4639
4708
4640
4709
// Replace all uses of load with new and (except for the use of load in the
4641
4710
// new and itself).
@@ -5550,6 +5619,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
5550
5619
5551
5620
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
5552
5621
5622
+ if (BinOp && (BinOp->getOpcode () == Instruction::And) &&
5623
+ EnableAndCmpSinking && TLI)
5624
+ return sinkAndCmp0Expression (BinOp, *TLI, InsertedInsts);
5625
+
5553
5626
if (BinOp && (BinOp->getOpcode () == Instruction::AShr ||
5554
5627
BinOp->getOpcode () == Instruction::LShr)) {
5555
5628
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand (1 ));
@@ -5679,68 +5752,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
5679
5752
return MadeChange;
5680
5753
}
5681
5754
5682
- // If there is a sequence that branches based on comparing a single bit
5683
- // against zero that can be combined into a single instruction, and the
5684
- // target supports folding these into a single instruction, sink the
5685
- // mask and compare into the branch uses. Do this before OptimizeBlock ->
5686
- // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
5687
- // searched for.
5688
- bool CodeGenPrepare::sinkAndCmp (Function &F) {
5689
- if (!EnableAndCmpSinking)
5690
- return false ;
5691
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal ())
5692
- return false ;
5693
- bool MadeChange = false ;
5694
- for (BasicBlock &BB : F) {
5695
- // Does this BB end with the following?
5696
- // %andVal = and %val, #single-bit-set
5697
- // %icmpVal = icmp %andResult, 0
5698
- // br i1 %cmpVal label %dest1, label %dest2"
5699
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator ());
5700
- if (!Brcc || !Brcc->isConditional ())
5701
- continue ;
5702
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand (0 ));
5703
- if (!Cmp || Cmp->getParent () != &BB)
5704
- continue ;
5705
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand (1 ));
5706
- if (!Zero || !Zero->isZero ())
5707
- continue ;
5708
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand (0 ));
5709
- if (!And || And->getOpcode () != Instruction::And || And->getParent () != &BB)
5710
- continue ;
5711
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand (1 ));
5712
- if (!Mask || !Mask->getUniqueInteger ().isPowerOf2 ())
5713
- continue ;
5714
- DEBUG (dbgs () << " found and; icmp ?,0; brcc\n " ); DEBUG (BB.dump ());
5715
-
5716
- // Push the "and; icmp" for any users that are conditional branches.
5717
- // Since there can only be one branch use per BB, we don't need to keep
5718
- // track of which BBs we insert into.
5719
- for (Use &TheUse : Cmp->uses ()) {
5720
- // Find brcc use.
5721
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
5722
- if (!BrccUser || !BrccUser->isConditional ())
5723
- continue ;
5724
- BasicBlock *UserBB = BrccUser->getParent ();
5725
- if (UserBB == &BB) continue ;
5726
- DEBUG (dbgs () << " found Brcc use\n " );
5727
-
5728
- // Sink the "and; icmp" to use.
5729
- MadeChange = true ;
5730
- BinaryOperator *NewAnd =
5731
- BinaryOperator::CreateAnd (And->getOperand (0 ), And->getOperand (1 ), " " ,
5732
- BrccUser);
5733
- CmpInst *NewCmp =
5734
- CmpInst::Create (Cmp->getOpcode (), Cmp->getPredicate (), NewAnd, Zero,
5735
- " " , BrccUser);
5736
- TheUse = NewCmp;
5737
- ++NumAndCmpsMoved;
5738
- DEBUG (BrccUser->getParent ()->dump ());
5739
- }
5740
- }
5741
- return MadeChange;
5742
- }
5743
-
5744
5755
// / \brief Scale down both weights to fit into uint32_t.
5745
5756
static void scaleWeights (uint64_t &NewTrue, uint64_t &NewFalse) {
5746
5757
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
0 commit comments