Skip to content

Commit f8c2aad

Browse files
jgu222igcbot
authored andcommitted
Apply WA on flag spill
Need to apply WA on flag spill.
1 parent 2dc695b commit f8c2aad

File tree

3 files changed

+80
-8
lines changed

3 files changed

+80
-8
lines changed

visa/G4_IR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ G4_INST::G4_INST(
241241
srcs[3] = s3;
242242

243243
dead = false;
244+
createdPreRA = false;
244245
implAccSrc = nullptr;
245246
implAccDst = nullptr;
246247

visa/G4_IR.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ class G4_INST
317317
// during optimization, an inst may become redundant and be marked dead
318318
unsigned short dead : 1;
319319
unsigned short evenlySplitInst : 1;
320+
unsigned short createdPreRA : 1; // for NoMaskWA
320321
G4_ExecSize execSize;
321322

322323
BinInst *bin;
@@ -1056,6 +1057,11 @@ typedef struct _SWSBInfo
10561057
// prefer addComment if don't wish to stomp earlier comments
10571058
void setComments(const std::string& comments);
10581059

1060+
// For NoMaskWA. Set in PreRA WA for all instructions. PostRA WA will
1061+
// apply on new instructions created by RA only.
1062+
bool getCreatedPreRA() const { return createdPreRA; }
1063+
void setCreatedPreRA(bool V) { createdPreRA = V; }
1064+
10591065
std::string getComments() const
10601066
{
10611067
auto comments = getMetadata(Metadata::InstComment);

visa/Optimizer.cpp

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6879,7 +6879,8 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68796879
void Optimizer::HWWorkaround()
68806880
{
68816881
if ((kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM) &&
6882-
builder.getJitInfo()->spillMemUsed > 0 && builder.hasFusedEUWA())
6882+
builder.hasFusedEUWA() &&
6883+
(builder.getJitInfo()->spillMemUsed > 0 || builder.getJitInfo()->numFlagSpillStore > 0))
68836884
{
68846885
// For now, do it for CM/VC. Will turn it on for all.
68856886
doNoMaskWA_postRA();
@@ -11593,6 +11594,10 @@ void Optimizer::doNoMaskWA()
1159311594
for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
1159411595
{
1159511596
G4_INST* I = *II;
11597+
11598+
// Mark all instruction as created by preRA to avoid re-processing postRA
11599+
I->setCreatedPreRA(true);
11600+
1159611601
if (!isCandidateInst(I, fg))
1159711602
{
1159811603
continue;
@@ -11779,21 +11784,48 @@ void Optimizer::doNoMaskWA()
1177911784
// // scratch space spill: SP_GRF_V77_3 from offset[4x32];
1178011785
// (W) send.dc0 (16|M0) null r0 r4 0x80 0x020F1004
1178111786
//
11787+
// For flag spill:
11788+
// Need WA as well due to the following case:
11789+
//
11790+
// After RA:
11791+
// BB_19:
11792+
// (W) mov (1|M0) r34.8<1>:uw f0.1<0;1,0>:uw
11793+
// ...
11794+
// BB_21:
11795+
// (W) mov (1|M0) f1.1<1>:uw r34.8<0;1,0>:uw
11796+
//
11797+
// If BB_19 should be skipped but runs due to this HW bug, r34.8 will be updated
11798+
// with a f0.1, which is undefined value. And at BB_21, reading from r34.8 will
11799+
// get garbage value!
11800+
//
1178211801
// Note this works only for NoMaskWA=2
1178311802
//
1178411803
void Optimizer::doNoMaskWA_postRA()
1178511804
{
1178611805
std::vector<INST_LIST_ITER> NoMaskCandidates;
1178711806
G4_ExecSize simdsize = fg.getKernel()->getSimdSize();
11807+
const bool HasFlagSpill = (builder.getJitInfo()->numFlagSpillStore > 0);
11808+
11809+
auto isCandidate = [&](G4_INST* I) {
11810+
if (I->getCreatedPreRA() || !I->isWriteEnableInst())
11811+
{
11812+
return false;
11813+
}
1178811814

11789-
auto isCandidate = [](G4_INST* I) {
11790-
if (I->isSend() && I->isWriteEnableInst() &&
11791-
I->getPredicate() == nullptr &&
11815+
// If it is global flag spill or global grf spill, need to do WA.
11816+
// For now, global checking is not available
11817+
11818+
// 1. flag spill
11819+
if (HasFlagSpill &&
11820+
I->isMov() && I->getSrc(0) && I->getSrc(0)->isFlag() &&
11821+
I->getExecSize() == g4::SIMD1 && I->getPredicate() == nullptr)
11822+
{
11823+
return true;
11824+
}
11825+
// 2. GRF spill
11826+
if (I->isSend() && I->getPredicate() == nullptr &&
1179211827
(I->getDst() == nullptr || I->getDst()->isNullReg()))
1179311828
{
11794-
// This shall be a spill (write).
11795-
// May check if the spilled var is global. We only need
11796-
// to do WA for global spill!
1179711829
return true;
1179811830
}
1179911831
return false;
@@ -11851,6 +11883,16 @@ void Optimizer::doNoMaskWA_postRA()
1185111883
// (W & f0.0.any16h) send (16|M0) ...
1185211884
// 3. (W) mov (1|M0) f0.0<1>:uw DW0:uw // restore
1185311885
//
11886+
// For flag spill, the sequence is the same as the above except for the case in which
11887+
// the WAFlag is the same as spilled flag. For example,
11888+
//
11889+
// (W) mov (1|M0) r34.8<1>:uw f0.0<0;1,0>:uw
11890+
//
11891+
// 1. (W) mov (1|M0) DW0:uw f0.0<0;1,0>:uw // save
11892+
// 2. (W) mov (1|M0) f0.0<1>:uw DW1:uw // WARestore
11893+
// (W & f0.0.any16h) mov r34.8<1>:uw DW0.0<0;1,0>:uw
11894+
// 3. (W) mov (1|M0) f0.0<1>:uw DW0:uw // restore
11895+
//
1185411896
// Todo: check if save/restore is needed to avoid redundant save/restore.
1185511897
//
1185611898
G4_Declare* saveTmp = builder.getEUFusionWATmpVar(); // 2DW;
@@ -11891,10 +11933,15 @@ void Optimizer::doNoMaskWA_postRA()
1189111933
// Without optimization, always do save/restore
1189211934
bool needSave = true;
1189311935
bool needRestore = true;
11936+
11937+
// wa flag register to use f(wafregnum, wafsregnum)
11938+
uint32_t wafregnum = 0;
11939+
uint32_t wafsregnum = 0;
11940+
1189411941
G4_Type Ty = (simdsize > 16) ? Type_UD : Type_UW;
1189511942
G4_Declare* flagDcl = builder.createTempFlag((Ty == Type_UW ? 1 : 2), "waflag");
1189611943
G4_RegVar* flagVar = flagDcl->getRegVar();
11897-
flagVar->setPhyReg(builder.phyregpool.getFlagAreg(0), 0);
11944+
flagVar->setPhyReg(builder.phyregpool.getFlagAreg(wafregnum), wafsregnum);
1189811945

1189911946
// Save flag, create WA mask, save WAflag
1190011947
createMov1(BB, WAInsts[0], saveVar, saveOff, flagVar, 0, Ty); // save
@@ -11915,6 +11962,24 @@ void Optimizer::doNoMaskWA_postRA()
1191511962
G4_INST* I = *currII;
1191611963
G4_Predicate* newPred = builder.createPredicate(
1191711964
PredState_Plus, flagVar, 0, waPredCtrl);
11965+
if (I->isMov() && I->getSrc(0) && I->getSrc(0)->isFlag())
11966+
{
11967+
G4_SrcRegRegion* srcReg = I->getSrc(0)->asSrcRegRegion();
11968+
G4_RegVar* baseVar = static_cast<G4_RegVar*>(srcReg->getBase());
11969+
assert(baseVar->isPhyRegAssigned());
11970+
11971+
// For flag, G4_Areg has flag number and G4_RegVar has subRefOff.
11972+
// (SrcRegRegion's refOff/subRefOff is 0/0 always.)
11973+
G4_Areg* flagReg = baseVar->getPhyReg()->getAreg();
11974+
uint32_t subRegOff = baseVar->getPhyRegOff();
11975+
if (flagReg->getFlagNum() == wafregnum &&
11976+
(Ty == Type_UD /* 32bit flag */ || subRegOff == wafsregnum /* 16bit flag */))
11977+
{
11978+
G4_SrcRegRegion* S = builder.createSrc(
11979+
saveVar, 0, saveOff, builder.getRegionScalar(), Ty);
11980+
I->setSrc(S, 0);
11981+
}
11982+
}
1191811983
I->setPredicate(newPred);
1191911984

1192011985
if (i == (sz - 1) || needRestore) {

0 commit comments

Comments
 (0)