@@ -298,6 +298,47 @@ G4_SrcRegRegion* HWConformity::insertCopyBefore(INST_LIST_ITER it, uint32_t srcN
298
298
return newSrc;
299
299
}
300
300
301
+ G4_SrcRegRegion* HWConformity::insertCopyAtBBEntry (G4_BB *bb, uint8_t execSize, G4_Operand *src)
302
+ {
303
+ MUST_BE_TRUE (src != nullptr && src->isSrcRegRegion (), " source must be a SrcRegRegion" );
304
+ G4_SrcRegRegion* origSrc = src->asSrcRegRegion ();
305
+ auto lb = src->getLinearizedStart ();
306
+ auto rb = src->getLinearizedEnd ();
307
+
308
+ unsigned int regNum = lb / G4_GRF_REG_NBYTES;
309
+ unsigned int numRegs = (rb + G4_GRF_REG_NBYTES - 1 - lb) / G4_GRF_REG_NBYTES;
310
+ if (regNum == -1 || numRegs == 0 )
311
+ {
312
+ return nullptr ;
313
+ }
314
+
315
+ G4_Declare* dcl = builder.createTempVar (execSize, origSrc->getType (), GRFALIGN);
316
+ dcl->getRegVar ()->setPhyReg (builder.phyregpool .getGreg (regNum), 0 );
317
+ G4_SrcModifier modifier = origSrc->getModifier ();
318
+ origSrc->setModifier (Mod_src_undef);
319
+ G4_DstRegRegion* dst = builder.Create_Dst_Opnd_From_Dcl (dcl, 1 );
320
+ dst->computePReg ();
321
+
322
+ G4_INST* movInst = builder.createMov (execSize, dst, origSrc, InstOpt_WriteEnable, false );
323
+
324
+ for (auto it = bb->begin ();
325
+ it != bb->end ();
326
+ it++)
327
+ {
328
+ if (!(*it)->isLabel ())
329
+ {
330
+ bb->insert (it, movInst);
331
+ break ;
332
+ }
333
+ }
334
+
335
+ G4_SrcRegRegion* newSrc = builder.createSrcRegRegion (modifier, Direct, dcl->getRegVar (),
336
+ 0 , 0 , execSize == 1 ? builder.getRegionScalar () : builder.getRegionStride1 (),
337
+ dcl->getElemType ());
338
+ newSrc->asSrcRegRegion ()->computePReg ();
339
+ return newSrc;
340
+ }
341
+
301
342
/*
302
343
* create a new mov instruction
303
344
* mov (esize) tmp<1>:type src
@@ -4222,6 +4263,7 @@ struct AccInterval
4222
4263
int assignedAcc = -1 ;
4223
4264
int bundleConflictTimes = 0 ;
4224
4265
int bankConflictTimes = 0 ;
4266
+ int suppressionTimes = 0 ;
4225
4267
4226
4268
AccInterval (G4_INST* inst_, int lastUse_, bool preAssigned = false ) :
4227
4269
inst (inst_), lastUse(lastUse_), isPreAssigned(preAssigned)
@@ -4543,6 +4585,31 @@ static bool replaceDstWithAcc(G4_INST* inst, int accNum, IR_Builder& builder)
4543
4585
return true ;
4544
4586
}
4545
4587
4588
+ static bool isAccCandidate (G4_INST* inst, Gen4_Operand_Number opndNum, G4_Kernel& kernel)
4589
+
4590
+ {
4591
+ if (!kernel.fg .builder ->canMadHaveSrc0Acc ())
4592
+ {
4593
+ return false ;
4594
+ }
4595
+
4596
+ switch (opndNum)
4597
+ {
4598
+ case Opnd_src0:
4599
+ case Opnd_src1:
4600
+ break ;
4601
+ default :
4602
+ return false ;
4603
+ }
4604
+
4605
+ if (!inst->canSrcBeAcc (opndNum))
4606
+ {
4607
+ return false ;
4608
+ }
4609
+
4610
+ return true ;
4611
+ }
4612
+
4546
4613
struct AccAssignment
4547
4614
{
4548
4615
std::vector<bool > freeAccs;
@@ -4677,13 +4744,15 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
4677
4744
bool mustBeAcc0 = false ;
4678
4745
int bundleBCTimes = 0 ;
4679
4746
int bankBCTimes = 0 ;
4747
+ int readSuppressionSrcs = 0 ;
4680
4748
if (isAccCandidate (inst, kernel, lastUseId, mustBeAcc0))
4681
4749
{
4682
4750
// this is a potential candidate for acc substitution
4683
4751
AccInterval *newInterval = new AccInterval (inst, lastUseId);
4684
4752
newInterval->mustBeAcc0 = mustBeAcc0;
4685
4753
newInterval->bankConflictTimes = bankBCTimes;
4686
4754
newInterval->bundleConflictTimes = bundleBCTimes;
4755
+ newInterval->suppressionTimes = readSuppressionSrcs;
4687
4756
4688
4757
intervals.push_back (newInterval);
4689
4758
}
@@ -4788,7 +4857,126 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
4788
4857
{
4789
4858
delete intervals[i];
4790
4859
}
4860
+
4861
+ return ;
4862
+ }
4863
+
4864
+ struct LiveNode
4865
+ {
4866
+ G4_INST* Inst;
4867
+ Gen4_Operand_Number OpNum;
4868
+ LiveNode (G4_INST* Inst, Gen4_Operand_Number OpNum)
4869
+ : Inst(Inst)
4870
+ , OpNum(OpNum)
4871
+ {
4872
+ }
4873
+ };
4874
+
4875
+ #define GLOBAL_USE_NUM 15
4876
+
4877
+ static bool isSameOperand (G4_Operand *srcOpnd, struct LiveNode *ln)
4878
+ {
4879
+ G4_Operand *opnd = ln->Inst ->getOperand (ln->OpNum );
4880
+
4881
+ if (opnd->compareOperand (srcOpnd) == Rel_eq)
4882
+ {
4883
+ return true ;
4884
+ }
4885
+
4886
+ return false ;
4791
4887
}
4888
+
4889
+ // substitute local operands with acc when possible
4890
+ void HWConformity::localizeForAcc (G4_BB* bb)
4891
+ {
4892
+ std::map<const G4_Declare*, G4_Operand*> replacedOperand;
4893
+ std::unordered_map<const G4_Declare*, vector<struct LiveNode >> useNodes;
4894
+ std::vector<const G4_Declare*> erasedCandidates;
4895
+
4896
+ for (auto instIter = bb->begin (), instEnd = bb->end (); instIter != instEnd; ++instIter)
4897
+ {
4898
+ G4_INST* inst = *instIter;
4899
+
4900
+ // Not defined in current BB
4901
+ G4_Operand* dst = inst->getOperand (Opnd_dst);
4902
+ if (dst && dst->isGreg () && kernel.fg .globalOpndHT .isOpndGlobal (dst))
4903
+ {
4904
+ const G4_Declare *dcl = dst->getTopDcl ();
4905
+ if (useNodes.find (dcl) != useNodes.end ())
4906
+ {
4907
+ useNodes.erase (dcl);
4908
+ erasedCandidates.emplace_back (dcl);
4909
+ }
4910
+ }
4911
+
4912
+ // Source operand
4913
+ for (auto OpNum :
4914
+ { Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
4915
+ Gen4_Operand_Number::Opnd_src2})
4916
+ {
4917
+ G4_Operand* src = inst->getOperand (OpNum);
4918
+ if (src && src->isGreg () && kernel.fg .globalOpndHT .isOpndGlobal (src))
4919
+ {
4920
+ const G4_Declare* dcl = src->getTopDcl ();
4921
+ if ((OpNum != Opnd_src0 && // Acc can be used only for src0 and src1
4922
+ OpNum != Opnd_src1) ||
4923
+ !isAccCandidate (inst, OpNum, kernel)) // The operand is can be replaced with ACC
4924
+ {
4925
+ auto dclIter = std::find (erasedCandidates.begin (), erasedCandidates.end (), dcl);
4926
+ if (dclIter == erasedCandidates.end ())
4927
+ {
4928
+ erasedCandidates.emplace_back (dcl);
4929
+ }
4930
+ }
4931
+ else
4932
+ {
4933
+ if (useNodes[dcl].empty () ||
4934
+ isSameOperand (src, &(useNodes[dcl][0 ])))
4935
+ {
4936
+ useNodes[dcl].emplace_back (inst, OpNum);
4937
+ }
4938
+ }
4939
+ }
4940
+ }
4941
+ }
4942
+
4943
+ for (auto & Nodes : useNodes)
4944
+ {
4945
+ const G4_Declare* dcl = Nodes.first ;
4946
+ auto dclIter = std::find (erasedCandidates.begin (), erasedCandidates.end (), dcl);
4947
+ if (dclIter != erasedCandidates.end ())
4948
+ {
4949
+ continue ;
4950
+ }
4951
+ if (Nodes.second .size () >= GLOBAL_USE_NUM)
4952
+ {
4953
+ for (auto & LN : Nodes.second )
4954
+ {
4955
+ G4_INST* inst = LN.Inst ;
4956
+ Gen4_Operand_Number opNum = LN.OpNum ;
4957
+ int i = inst->getSrcNum (opNum);
4958
+ G4_Operand* src = inst->getSrc (i);
4959
+ G4_Operand* tmpOpnd = nullptr ;
4960
+
4961
+ auto itR = replacedOperand.find (dcl);
4962
+ if (itR != replacedOperand.end ())
4963
+ {
4964
+ tmpOpnd = builder.duplicateOperand (itR->second );
4965
+ }
4966
+ else
4967
+ {
4968
+ tmpOpnd = insertCopyAtBBEntry (bb, inst->getExecSize (), src);
4969
+ replacedOperand[dcl] = tmpOpnd;
4970
+ }
4971
+ inst->setSrc (tmpOpnd, i);
4972
+ }
4973
+ }
4974
+ }
4975
+
4976
+ return ;
4977
+ }
4978
+
4979
+
4792
4980
// substitute local operands with acc when possible
4793
4981
void HWConformity::accSubstitution (G4_BB* bb)
4794
4982
{
0 commit comments