@@ -77,7 +77,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
77
77
STATISTIC (NumRetsDup, " Number of return instructions duplicated" );
78
78
STATISTIC (NumDbgValueMoved, " Number of debug value instructions moved" );
79
79
STATISTIC (NumSelectsExpanded, " Number of selects turned into branches" );
80
- STATISTIC (NumAndCmpsMoved, " Number of and/cmp's pushed into branches" );
81
80
STATISTIC (NumStoreExtractExposed, " Number of store(extractelement) exposed" );
82
81
83
82
static cl::opt<bool > DisableBranchOpts (
@@ -217,7 +216,6 @@ class TypePromotionTransaction;
217
216
bool optimizeExtractElementInst (Instruction *Inst);
218
217
bool dupRetToEnableTailCallOpts (BasicBlock *BB);
219
218
bool placeDbgValues (Function &F);
220
- bool sinkAndCmp (Function &F);
221
219
bool extLdPromotion (TypePromotionTransaction &TPT, LoadInst *&LI,
222
220
Instruction *&Inst,
223
221
const SmallVectorImpl<Instruction *> &Exts,
@@ -295,14 +293,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
295
293
// find a node corresponding to the value.
296
294
EverMadeChange |= placeDbgValues (F);
297
295
298
- // If there is a mask, compare against zero, and branch that can be combined
299
- // into a single target instruction, push the mask and compare into branch
300
- // users. Do this before OptimizeBlock -> OptimizeInst ->
301
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
302
- if (!DisableBranchOpts) {
303
- EverMadeChange |= sinkAndCmp (F);
296
+ if (!DisableBranchOpts)
304
297
EverMadeChange |= splitBranchCondition (F);
305
- }
306
298
307
299
bool MadeChange = true ;
308
300
while (MadeChange) {
@@ -1095,6 +1087,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1095
1087
return false ;
1096
1088
}
1097
1089
1090
+ // / Duplicate and sink the given 'and' instruction into user blocks where it is
1091
+ // / used in a compare to allow isel to generate better code for targets where
1092
+ // / this operation can be combined.
1093
+ // /
1094
+ // / Return true if any changes are made.
1095
+ static bool sinkAndCmp0Expression (Instruction *AndI,
1096
+ const TargetLowering &TLI,
1097
+ SetOfInstrs &InsertedInsts) {
1098
+ // Double-check that we're not trying to optimize an instruction that was
1099
+ // already optimized by some other part of this pass.
1100
+ assert (!InsertedInsts.count (AndI) &&
1101
+ " Attempting to optimize already optimized and instruction" );
1102
+ (void ) InsertedInsts;
1103
+
1104
+ // Nothing to do for single use in same basic block.
1105
+ if (AndI->hasOneUse () &&
1106
+ AndI->getParent () == cast<Instruction>(*AndI->user_begin ())->getParent ())
1107
+ return false ;
1108
+
1109
+ // Try to avoid cases where sinking/duplicating is likely to increase register
1110
+ // pressure.
1111
+ if (!isa<ConstantInt>(AndI->getOperand (0 )) &&
1112
+ !isa<ConstantInt>(AndI->getOperand (1 )) &&
1113
+ AndI->getOperand (0 )->hasOneUse () && AndI->getOperand (1 )->hasOneUse ())
1114
+ return false ;
1115
+
1116
+ for (auto *U : AndI->users ()) {
1117
+ Instruction *User = cast<Instruction>(U);
1118
+
1119
+ // Only sink for and mask feeding icmp with 0.
1120
+ if (!isa<ICmpInst>(User))
1121
+ return false ;
1122
+
1123
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand (1 ));
1124
+ if (!CmpC || !CmpC->isZero ())
1125
+ return false ;
1126
+ }
1127
+
1128
+ if (!TLI.isMaskAndCmp0FoldingBeneficial (*AndI))
1129
+ return false ;
1130
+
1131
+ DEBUG (dbgs () << " found 'and' feeding only icmp 0;\n " );
1132
+ DEBUG (AndI->getParent ()->dump ());
1133
+
1134
+ // Push the 'and' into the same block as the icmp 0. There should only be
1135
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1136
+ // others, so we don't need to keep track of which BBs we insert into.
1137
+ for (Value::user_iterator UI = AndI->user_begin (), E = AndI->user_end ();
1138
+ UI != E; ) {
1139
+ Use &TheUse = UI.getUse ();
1140
+ Instruction *User = cast<Instruction>(*UI);
1141
+
1142
+ // Preincrement use iterator so we don't invalidate it.
1143
+ ++UI;
1144
+
1145
+ DEBUG (dbgs () << " sinking 'and' use: " << *User << " \n " );
1146
+
1147
+ // Keep the 'and' in the same place if the use is already in the same block.
1148
+ Instruction *InsertPt =
1149
+ User->getParent () == AndI->getParent () ? AndI : User;
1150
+ Instruction *InsertedAnd =
1151
+ BinaryOperator::Create (Instruction::And, AndI->getOperand (0 ),
1152
+ AndI->getOperand (1 ), " " , InsertPt);
1153
+ // Propagate the debug info.
1154
+ InsertedAnd->setDebugLoc (AndI->getDebugLoc ());
1155
+
1156
+ // Replace a use of the 'and' with a use of the new 'and'.
1157
+ TheUse = InsertedAnd;
1158
+ ++NumAndUses;
1159
+ DEBUG (User->getParent ()->dump ());
1160
+ }
1161
+
1162
+ // We removed all uses, nuke the and.
1163
+ AndI->eraseFromParent ();
1164
+ return true ;
1165
+ }
1166
+
1098
1167
// / Check if the candidates could be combined with a shift instruction, which
1099
1168
// / includes:
1100
1169
// / 1. Truncate instruction
@@ -4544,13 +4613,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
4544
4613
!(Load->getType ()->isIntegerTy () || Load->getType ()->isPointerTy ()))
4545
4614
return false ;
4546
4615
4547
- // Skip loads we've already transformed or have no reason to transform.
4548
- if (Load->hasOneUse ()) {
4549
- User *LoadUser = *Load->user_begin ();
4550
- if (cast<Instruction>(LoadUser)->getParent () == Load->getParent () &&
4551
- !dyn_cast<PHINode>(LoadUser))
4552
- return false ;
4553
- }
4616
+ // Skip loads we've already transformed.
4617
+ if (Load->hasOneUse () &&
4618
+ InsertedInsts.count (cast<Instruction>(*Load->user_begin ())))
4619
+ return false ;
4554
4620
4555
4621
// Look at all uses of Load, looking through phis, to determine how many bits
4556
4622
// of the loaded value are needed.
@@ -4646,6 +4712,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
4646
4712
IRBuilder<> Builder (Load->getNextNode ());
4647
4713
auto *NewAnd = dyn_cast<Instruction>(
4648
4714
Builder.CreateAnd (Load, ConstantInt::get (Ctx, DemandBits)));
4715
+ // Mark this instruction as "inserted by CGP", so that other
4716
+ // optimizations don't touch it.
4717
+ InsertedInsts.insert (NewAnd);
4649
4718
4650
4719
// Replace all uses of load with new and (except for the use of load in the
4651
4720
// new and itself).
@@ -5560,6 +5629,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
5560
5629
5561
5630
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
5562
5631
5632
+ if (BinOp && (BinOp->getOpcode () == Instruction::And) &&
5633
+ EnableAndCmpSinking && TLI)
5634
+ return sinkAndCmp0Expression (BinOp, *TLI, InsertedInsts);
5635
+
5563
5636
if (BinOp && (BinOp->getOpcode () == Instruction::AShr ||
5564
5637
BinOp->getOpcode () == Instruction::LShr)) {
5565
5638
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand (1 ));
@@ -5689,68 +5762,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
5689
5762
return MadeChange;
5690
5763
}
5691
5764
5692
- // If there is a sequence that branches based on comparing a single bit
5693
- // against zero that can be combined into a single instruction, and the
5694
- // target supports folding these into a single instruction, sink the
5695
- // mask and compare into the branch uses. Do this before OptimizeBlock ->
5696
- // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
5697
- // searched for.
5698
- bool CodeGenPrepare::sinkAndCmp (Function &F) {
5699
- if (!EnableAndCmpSinking)
5700
- return false ;
5701
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal ())
5702
- return false ;
5703
- bool MadeChange = false ;
5704
- for (BasicBlock &BB : F) {
5705
- // Does this BB end with the following?
5706
- // %andVal = and %val, #single-bit-set
5707
- // %icmpVal = icmp %andResult, 0
5708
- // br i1 %cmpVal label %dest1, label %dest2"
5709
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator ());
5710
- if (!Brcc || !Brcc->isConditional ())
5711
- continue ;
5712
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand (0 ));
5713
- if (!Cmp || Cmp->getParent () != &BB)
5714
- continue ;
5715
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand (1 ));
5716
- if (!Zero || !Zero->isZero ())
5717
- continue ;
5718
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand (0 ));
5719
- if (!And || And->getOpcode () != Instruction::And || And->getParent () != &BB)
5720
- continue ;
5721
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand (1 ));
5722
- if (!Mask || !Mask->getUniqueInteger ().isPowerOf2 ())
5723
- continue ;
5724
- DEBUG (dbgs () << " found and; icmp ?,0; brcc\n " ); DEBUG (BB.dump ());
5725
-
5726
- // Push the "and; icmp" for any users that are conditional branches.
5727
- // Since there can only be one branch use per BB, we don't need to keep
5728
- // track of which BBs we insert into.
5729
- for (Use &TheUse : Cmp->uses ()) {
5730
- // Find brcc use.
5731
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
5732
- if (!BrccUser || !BrccUser->isConditional ())
5733
- continue ;
5734
- BasicBlock *UserBB = BrccUser->getParent ();
5735
- if (UserBB == &BB) continue ;
5736
- DEBUG (dbgs () << " found Brcc use\n " );
5737
-
5738
- // Sink the "and; icmp" to use.
5739
- MadeChange = true ;
5740
- BinaryOperator *NewAnd =
5741
- BinaryOperator::CreateAnd (And->getOperand (0 ), And->getOperand (1 ), " " ,
5742
- BrccUser);
5743
- CmpInst *NewCmp =
5744
- CmpInst::Create (Cmp->getOpcode (), Cmp->getPredicate (), NewAnd, Zero,
5745
- " " , BrccUser);
5746
- TheUse = NewCmp;
5747
- ++NumAndCmpsMoved;
5748
- DEBUG (BrccUser->getParent ()->dump ());
5749
- }
5750
- }
5751
- return MadeChange;
5752
- }
5753
-
5754
5765
// / \brief Scale down both weights to fit into uint32_t.
5755
5766
static void scaleWeights (uint64_t &NewTrue, uint64_t &NewFalse) {
5756
5767
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
0 commit comments