Skip to content

Commit af2c7ad

Browse files
bcheng0127sys_zuul
authored andcommitted
localization for acc
Change-Id: I1cf6be5ed7f3ada6ec90d542980c374ac28dfdbe
1 parent 1bddd1a commit af2c7ad

File tree

4 files changed

+204
-2
lines changed

4 files changed

+204
-2
lines changed

visa/HWConformity.cpp

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,47 @@ G4_SrcRegRegion* HWConformity::insertCopyBefore(INST_LIST_ITER it, uint32_t srcN
298298
return newSrc;
299299
}
300300

301+
G4_SrcRegRegion* HWConformity::insertCopyAtBBEntry(G4_BB *bb, uint8_t execSize, G4_Operand *src)
302+
{
303+
MUST_BE_TRUE(src != nullptr && src->isSrcRegRegion(), "source must be a SrcRegRegion");
304+
G4_SrcRegRegion* origSrc = src->asSrcRegRegion();
305+
auto lb = src->getLinearizedStart();
306+
auto rb = src->getLinearizedEnd();
307+
308+
unsigned int regNum = lb / G4_GRF_REG_NBYTES;
309+
unsigned int numRegs = (rb + G4_GRF_REG_NBYTES - 1 - lb) / G4_GRF_REG_NBYTES;
310+
if (regNum == -1 || numRegs == 0)
311+
{
312+
return nullptr;
313+
}
314+
315+
G4_Declare* dcl = builder.createTempVar(execSize, origSrc->getType(), GRFALIGN);
316+
dcl->getRegVar()->setPhyReg(builder.phyregpool.getGreg(regNum), 0);
317+
G4_SrcModifier modifier = origSrc->getModifier();
318+
origSrc->setModifier(Mod_src_undef);
319+
G4_DstRegRegion* dst = builder.Create_Dst_Opnd_From_Dcl(dcl, 1);
320+
dst->computePReg();
321+
322+
G4_INST* movInst = builder.createMov(execSize, dst, origSrc, InstOpt_WriteEnable, false);
323+
324+
for (auto it = bb->begin();
325+
it != bb->end();
326+
it++)
327+
{
328+
if (!(*it)->isLabel())
329+
{
330+
bb->insert(it, movInst);
331+
break;
332+
}
333+
}
334+
335+
G4_SrcRegRegion* newSrc = builder.createSrcRegRegion(modifier, Direct, dcl->getRegVar(),
336+
0, 0, execSize == 1 ? builder.getRegionScalar() : builder.getRegionStride1(),
337+
dcl->getElemType());
338+
newSrc->asSrcRegRegion()->computePReg();
339+
return newSrc;
340+
}
341+
301342
/*
302343
* create a new mov instruction
303344
* mov (esize) tmp<1>:type src
@@ -4222,6 +4263,7 @@ struct AccInterval
42224263
int assignedAcc = -1;
42234264
int bundleConflictTimes = 0;
42244265
int bankConflictTimes = 0;
4266+
int suppressionTimes = 0;
42254267

42264268
AccInterval(G4_INST* inst_, int lastUse_, bool preAssigned = false) :
42274269
inst(inst_), lastUse(lastUse_), isPreAssigned(preAssigned)
@@ -4543,6 +4585,31 @@ static bool replaceDstWithAcc(G4_INST* inst, int accNum, IR_Builder& builder)
45434585
return true;
45444586
}
45454587

4588+
static bool isAccCandidate(G4_INST* inst, Gen4_Operand_Number opndNum, G4_Kernel& kernel)
4589+
4590+
{
4591+
if (!kernel.fg.builder->canMadHaveSrc0Acc())
4592+
{
4593+
return false;
4594+
}
4595+
4596+
switch (opndNum)
4597+
{
4598+
case Opnd_src0:
4599+
case Opnd_src1:
4600+
break;
4601+
default:
4602+
return false;
4603+
}
4604+
4605+
if (!inst->canSrcBeAcc(opndNum))
4606+
{
4607+
return false;
4608+
}
4609+
4610+
return true;
4611+
}
4612+
45464613
struct AccAssignment
45474614
{
45484615
std::vector<bool> freeAccs;
@@ -4677,13 +4744,15 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
46774744
bool mustBeAcc0 = false;
46784745
int bundleBCTimes = 0;
46794746
int bankBCTimes = 0;
4747+
int readSuppressionSrcs = 0;
46804748
if (isAccCandidate(inst, kernel, lastUseId, mustBeAcc0))
46814749
{
46824750
// this is a potential candidate for acc substitution
46834751
AccInterval *newInterval = new AccInterval(inst, lastUseId);
46844752
newInterval->mustBeAcc0 = mustBeAcc0;
46854753
newInterval->bankConflictTimes = bankBCTimes;
46864754
newInterval->bundleConflictTimes = bundleBCTimes;
4755+
newInterval->suppressionTimes = readSuppressionSrcs;
46874756

46884757
intervals.push_back(newInterval);
46894758
}
@@ -4788,7 +4857,126 @@ void HWConformity::multiAccSubstitution(G4_BB* bb)
47884857
{
47894858
delete intervals[i];
47904859
}
4860+
4861+
return;
4862+
}
4863+
4864+
struct LiveNode
4865+
{
4866+
G4_INST* Inst;
4867+
Gen4_Operand_Number OpNum;
4868+
LiveNode(G4_INST* Inst, Gen4_Operand_Number OpNum)
4869+
: Inst(Inst)
4870+
, OpNum(OpNum)
4871+
{
4872+
}
4873+
};
4874+
4875+
#define GLOBAL_USE_NUM 15
4876+
4877+
static bool isSameOperand(G4_Operand *srcOpnd, struct LiveNode *ln)
4878+
{
4879+
G4_Operand *opnd = ln->Inst->getOperand(ln->OpNum);
4880+
4881+
if (opnd->compareOperand(srcOpnd) == Rel_eq)
4882+
{
4883+
return true;
4884+
}
4885+
4886+
return false;
47914887
}
4888+
4889+
// substitute local operands with acc when possible
4890+
void HWConformity::localizeForAcc(G4_BB* bb)
4891+
{
4892+
std::map<const G4_Declare*, G4_Operand*> replacedOperand;
4893+
std::unordered_map<const G4_Declare*, vector<struct LiveNode>> useNodes;
4894+
std::vector<const G4_Declare*> erasedCandidates;
4895+
4896+
for (auto instIter = bb->begin(), instEnd = bb->end(); instIter != instEnd; ++instIter)
4897+
{
4898+
G4_INST* inst = *instIter;
4899+
4900+
//Not defined in current BB
4901+
G4_Operand* dst = inst->getOperand(Opnd_dst);
4902+
if (dst && dst->isGreg() && kernel.fg.globalOpndHT.isOpndGlobal(dst))
4903+
{
4904+
const G4_Declare *dcl = dst->getTopDcl();
4905+
if (useNodes.find(dcl) != useNodes.end())
4906+
{
4907+
useNodes.erase(dcl);
4908+
erasedCandidates.emplace_back(dcl);
4909+
}
4910+
}
4911+
4912+
//Source operand
4913+
for (auto OpNum :
4914+
{ Gen4_Operand_Number::Opnd_src0, Gen4_Operand_Number::Opnd_src1,
4915+
Gen4_Operand_Number::Opnd_src2})
4916+
{
4917+
G4_Operand* src = inst->getOperand(OpNum);
4918+
if (src && src->isGreg() && kernel.fg.globalOpndHT.isOpndGlobal(src))
4919+
{
4920+
const G4_Declare* dcl = src->getTopDcl();
4921+
if ((OpNum != Opnd_src0 && //Acc can be used only for src0 and src1
4922+
OpNum != Opnd_src1) ||
4923+
!isAccCandidate(inst, OpNum, kernel)) //The operand is can be replaced with ACC
4924+
{
4925+
auto dclIter = std::find(erasedCandidates.begin(), erasedCandidates.end(), dcl);
4926+
if (dclIter == erasedCandidates.end())
4927+
{
4928+
erasedCandidates.emplace_back(dcl);
4929+
}
4930+
}
4931+
else
4932+
{
4933+
if (useNodes[dcl].empty() ||
4934+
isSameOperand(src, &(useNodes[dcl][0])))
4935+
{
4936+
useNodes[dcl].emplace_back(inst, OpNum);
4937+
}
4938+
}
4939+
}
4940+
}
4941+
}
4942+
4943+
for (auto& Nodes : useNodes)
4944+
{
4945+
const G4_Declare* dcl = Nodes.first;
4946+
auto dclIter = std::find(erasedCandidates.begin(), erasedCandidates.end(), dcl);
4947+
if (dclIter != erasedCandidates.end())
4948+
{
4949+
continue;
4950+
}
4951+
if (Nodes.second.size() >= GLOBAL_USE_NUM)
4952+
{
4953+
for (auto& LN : Nodes.second)
4954+
{
4955+
G4_INST* inst = LN.Inst;
4956+
Gen4_Operand_Number opNum = LN.OpNum;
4957+
int i = inst->getSrcNum(opNum);
4958+
G4_Operand* src = inst->getSrc(i);
4959+
G4_Operand* tmpOpnd = nullptr;
4960+
4961+
auto itR = replacedOperand.find(dcl);
4962+
if (itR != replacedOperand.end())
4963+
{
4964+
tmpOpnd = builder.duplicateOperand(itR->second);
4965+
}
4966+
else
4967+
{
4968+
tmpOpnd = insertCopyAtBBEntry(bb, inst->getExecSize(), src);
4969+
replacedOperand[dcl] = tmpOpnd;
4970+
}
4971+
inst->setSrc(tmpOpnd, i);
4972+
}
4973+
}
4974+
}
4975+
4976+
return;
4977+
}
4978+
4979+
47924980
// substitute local operands with acc when possible
47934981
void HWConformity::accSubstitution(G4_BB* bb)
47944982
{

visa/HWConformity.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ namespace vISA
8282
G4_Operand* insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type, G4_BB *bb,
8383
G4_SubReg_Align tmpAlign = Any);
8484
G4_SrcRegRegion* insertCopyBefore(INST_LIST_ITER it, uint32_t srcNum, G4_SubReg_Align tmpAlign, G4_BB *bb);
85+
G4_SrcRegRegion* insertCopyAtBBEntry(G4_BB* bb, uint8_t newExecSize, G4_Operand* src);
8586
void broadcast(G4_BB* bb, INST_LIST_ITER it, int srcPos, G4_SubReg_Align subAlign);
8687

8788
G4_INST *splitInstWithByteDst(G4_INST *expand_op);
@@ -216,7 +217,7 @@ namespace vISA
216217
int getNumAccSubDef() const { return numAccSubDef; }
217218
int getNumAccSubUse() const { return numAccSubUse; }
218219
void accSubstitution(G4_BB* bb);
219-
220+
void localizeForAcc(G4_BB* bb);
220221
};
221222
}
222223
//single entry point for HW conformity checks

visa/Optimizer.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,18 @@ void Optimizer::accSubPostSchedule()
952952
kernel.fg.localDataFlowAnalysis();
953953

954954
HWConformity hwConf(builder, kernel, mem);
955+
956+
if (builder.getOption(vISA_localizationForAccSub))
957+
{
958+
for (auto bb : kernel.fg)
959+
{
960+
hwConf.localizeForAcc(bb);
961+
}
962+
963+
kernel.fg.resetLocalDataFlowData();
964+
kernel.fg.localDataFlowAnalysis();
965+
}
966+
955967
for (auto bb : kernel.fg)
956968
{
957969
hwConf.accSubstitution(bb);
@@ -12404,4 +12416,4 @@ void Optimizer::replaceNoMaskWithAnyhWA()
1240412416
}
1240512417
}
1240612418
}
12407-
}
12419+
}

visa/include/VISAOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ DEF_VISA_OPTION(vISA_EnableSplitVariables, ET_BOOL, "-noSplitVariables", UNUSED
5858
DEF_VISA_OPTION(vISA_ChangeMoveType, ET_BOOL, "-ALTMode", UNUSED, true)
5959
DEF_VISA_OPTION(vISA_accSubstitution, ET_BOOL, "-noAccSub", UNUSED, true)
6060
DEF_VISA_OPTION(vISA_doAccSubAfterSchedule, ET_BOOL, "-accSubPostSchedule", UNUSED, true)
61+
DEF_VISA_OPTION(vISA_localizationForAccSub, ET_BOOL, "-localizeForACC", UNUSED, false)
6162
DEF_VISA_OPTION(vISA_ifCvt, ET_BOOL, "-noifcvt", UNUSED, true)
6263
DEF_VISA_OPTION(vISA_LVN, ET_BOOL, "-nolvn", UNUSED, true)
6364
// only affects acc substitution for now

0 commit comments

Comments
 (0)