@@ -220,6 +220,8 @@ void CloneAddressArithmetic::computeFlow(llvm::Instruction* I) {
220
220
BFSQ.push (I);
221
221
unsigned int NumOfUses = Uses[I];
222
222
223
+ std::unordered_set<llvm::Instruction *> Explored;
224
+
223
225
while (!BFSQ.empty ()) {
224
226
225
227
llvm::Instruction *CurrI = BFSQ.front ();
@@ -233,11 +235,14 @@ void CloneAddressArithmetic::computeFlow(llvm::Instruction* I) {
233
235
bool NotConstant = !llvm::isa<llvm::Constant>(Op);
234
236
bool NotUniform = IGC_IS_FLAG_ENABLED (RematRespectUniformity) ? !WI->isUniform (Op) : true ;
235
237
bool AddressArithmetic = isAddressArithmetic (Op);
238
+ bool NotExplored = !Explored.count (Op);
236
239
237
- if (NotConstant && NotPHI && AddressArithmetic && NotUniform) {
238
- FlowMap[Op] = FlowMap[Op] + NumOfUses;
239
- BFSQ.push (Op);
240
- }
240
+ bool Skip = !(NotConstant && NotPHI && AddressArithmetic && NotUniform && NotExplored);
241
+ if (Skip) continue ;
242
+
243
+ FlowMap[Op] = FlowMap[Op] + NumOfUses;
244
+ Explored.insert (Op);
245
+ BFSQ.push (Op);
241
246
}
242
247
}
243
248
}
@@ -254,6 +259,7 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
254
259
PRINT_LOG (" Collect chain for: " ); PRINT_INST (I); PRINT_LOG_NL (" " );
255
260
256
261
llvm::SmallVector<unsigned int , 4 > StateVector;
262
+ std::unordered_set<llvm::Instruction *> Explored;
257
263
258
264
// we are travdrsing ssa-chain for address arithmetic
259
265
while (!BFSQ.empty ()) {
@@ -264,8 +270,7 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
264
270
for (unsigned int i = 0 ; i < CurrI->getNumOperands (); ++i) {
265
271
266
272
Instruction *Op = llvm::dyn_cast<Instruction>(CurrI->getOperand (i));
267
- if ( !Op)
268
- continue ;
273
+ if (!Op) continue ;
269
274
270
275
PRINT_LOG (" Candidate: [" << FlowMap[Op] << " ] " ); PRINT_INST (Op);
271
276
@@ -274,16 +279,21 @@ CloneAddressArithmetic::collectRematChain(llvm::Instruction* I, unsigned int Num
274
279
bool SameBB = IGC_IS_FLAG_ENABLED (RematSameBBScope) ? Op->getParent () == I->getParent () : true ;
275
280
bool NotUniform = IGC_IS_FLAG_ENABLED (RematRespectUniformity) ? !WI->isUniform (Op) : true ;
276
281
bool AddressArithmetic = isAddressArithmetic (Op);
277
-
278
282
bool NotTooManyUses = FlowMap[Op] <= NumOfUsesLimit;
283
+ bool NotExplored = !Explored.count (Op);
279
284
280
- if (SameBB && NotConstant && NotPHI && NotTooManyUses && AddressArithmetic && NotUniform) {
281
- BFSQ. push (Op );
282
- RematVector. push_back (Op);
283
- PRINT_LOG_NL (" \t\t --> Accepted " );
285
+ PRINT_LOG ( " \t\t " << " BB: " << SameBB << " Uses: " << NotTooManyUses << " Ar: " << AddressArithmetic << " Un: " << NotUniform);
286
+ bool Skip = !(SameBB && NotConstant && NotPHI && NotTooManyUses && AddressArithmetic && NotUniform && NotExplored );
287
+ if (Skip) {
288
+ PRINT_LOG_NL (" \t\t --> Rejected " );
284
289
continue ;
285
290
}
286
- PRINT_LOG_NL (" \t\t --> Rejected: " << " BB:" << SameBB << " Uses:" << NotTooManyUses << " Ar:" << AddressArithmetic << " Un:" << NotUniform);
291
+
292
+ BFSQ.push (Op);
293
+ Explored.insert (Op);
294
+ RematVector.push_back (Op);
295
+
296
+ PRINT_LOG_NL (" \t\t --> Accepted" );
287
297
}
288
298
}
289
299
@@ -540,8 +550,10 @@ unsigned int CloneAddressArithmetic::collectFlow(RematSet& ToProcess, Function&
540
550
float Coefficient = 0 .01f *(float )Base;
541
551
unsigned int Result = (unsigned int )((float )FlowBudget*Coefficient);
542
552
543
- for (auto el : ToProcess)
553
+ for (auto el : ToProcess) {
554
+ PRINT_LOG (" Start to compute flow: " ); PRINT_INST_NL (el);
544
555
computeFlow ((Instruction*)el);
556
+ }
545
557
546
558
if (DEBUG) {
547
559
for (const auto &el : FlowMap) {
@@ -555,25 +567,26 @@ unsigned int CloneAddressArithmetic::collectFlow(RematSet& ToProcess, Function&
555
567
556
568
bool CloneAddressArithmetic::greedyRemat (Function &F) {
557
569
558
- bool Result = false ;
559
570
if (isRegPressureLow (F))
560
- return Result ;
571
+ return false ;
561
572
562
573
initializeLogFile (F);
563
574
countUses (F);
575
+
564
576
RematSet ToProcess;
565
577
collectInstToProcess (ToProcess, F);
566
578
567
579
unsigned int FlowThreshold = collectFlow (ToProcess, F);
568
580
writeLog ();
569
- speculateWholeChain (ToProcess, FlowThreshold);
570
581
582
+ speculateWholeChain (ToProcess, FlowThreshold);
571
583
writeLog ();
584
+
572
585
rematerialize (ToProcess, FlowThreshold);
573
586
writeLog ();
574
587
575
588
FlowMap.clear ();
576
- return Result ;
589
+ return true ;
577
590
}
578
591
579
592
0 commit comments