Skip to content

Commit 2501c62

Browse files
jgu222igcbot
authored andcommitted
Apply WA on flag spill
Need to apply WA on flag spill.
1 parent 60b7ba2 commit 2501c62

File tree

3 files changed

+80
-8
lines changed

3 files changed

+80
-8
lines changed

visa/G4_IR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ G4_INST::G4_INST(
241241
srcs[3] = s3;
242242

243243
dead = false;
244+
createdPreRA = false;
244245
implAccSrc = nullptr;
245246
implAccDst = nullptr;
246247

visa/G4_IR.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ class G4_INST
317317
// during optimization, an inst may become redundant and be marked dead
318318
unsigned short dead : 1;
319319
unsigned short evenlySplitInst : 1;
320+
unsigned short createdPreRA : 1; // for NoMaskWA
320321
G4_ExecSize execSize;
321322

322323
BinInst *bin;
@@ -1046,6 +1047,11 @@ typedef struct _SWSBInfo
10461047
// prefer addComment if don't wish to stomp earlier comments
10471048
void setComments(const std::string& comments);
10481049

1050+
// For NoMaskWA. Set in PreRA WA for all instructions. PostRA WA will
1051+
// apply on new instructions created by RA only.
1052+
bool getCreatedPreRA() const { return createdPreRA; }
1053+
void setCreatedPreRA(bool V) { createdPreRA = V; }
1054+
10491055
std::string getComments() const
10501056
{
10511057
auto comments = getMetadata(Metadata::InstComment);

visa/Optimizer.cpp

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7218,7 +7218,8 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
72187218
void Optimizer::HWWorkaround()
72197219
{
72207220
if ((kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM) &&
7221-
builder.getJitInfo()->spillMemUsed > 0 && builder.hasFusedEUWA())
7221+
builder.hasFusedEUWA() &&
7222+
(builder.getJitInfo()->spillMemUsed > 0 || builder.getJitInfo()->numFlagSpillStore > 0))
72227223
{
72237224
// For now, do it for CM/VC. Will turn it on for all.
72247225
doNoMaskWA_postRA();
@@ -11917,6 +11918,10 @@ void Optimizer::doNoMaskWA()
1191711918
for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II)
1191811919
{
1191911920
G4_INST* I = *II;
11921+
11922+
// Mark all instruction as created by preRA to avoid re-processing postRA
11923+
I->setCreatedPreRA(true);
11924+
1192011925
if (!isCandidateInst(I, fg))
1192111926
{
1192211927
continue;
@@ -12103,21 +12108,48 @@ void Optimizer::doNoMaskWA()
1210312108
// // scratch space spill: SP_GRF_V77_3 from offset[4x32];
1210412109
// (W) send.dc0 (16|M0) null r0 r4 0x80 0x020F1004
1210512110
//
12111+
// For flag spill:
12112+
// Need WA as well due to the following case:
12113+
//
12114+
// After RA:
12115+
// BB_19:
12116+
// (W) mov (1|M0) r34.8<1>:uw f0.1<0;1,0>:uw
12117+
// ...
12118+
// BB_21:
12119+
// (W) mov (1|M0) f1.1<1>:uw r34.8<0;1,0>:uw
12120+
//
12121+
// If BB_19 should be skipped but runs due to this HW bug, r34.8 will be updated
12122+
// with a f0.1, which is undefined value. And at BB_21, reading from r34.8 will
12123+
// get garbage value!
12124+
//
1210612125
// Note this works only for NoMaskWA=2
1210712126
//
1210812127
void Optimizer::doNoMaskWA_postRA()
1210912128
{
1211012129
std::vector<INST_LIST_ITER> NoMaskCandidates;
1211112130
G4_ExecSize simdsize = fg.getKernel()->getSimdSize();
12131+
const bool HasFlagSpill = (builder.getJitInfo()->numFlagSpillStore > 0);
12132+
12133+
auto isCandidate = [&](G4_INST* I) {
12134+
if (I->getCreatedPreRA() || !I->isWriteEnableInst())
12135+
{
12136+
return false;
12137+
}
1211212138

12113-
auto isCandidate = [](G4_INST* I) {
12114-
if (I->isSend() && I->isWriteEnableInst() &&
12115-
I->getPredicate() == nullptr &&
12139+
// If it is global flag spill or global grf spill, need to do WA.
12140+
// For now, global checking is not available
12141+
12142+
// 1. flag spill
12143+
if (HasFlagSpill &&
12144+
I->isMov() && I->getSrc(0) && I->getSrc(0)->isFlag() &&
12145+
I->getExecSize() == g4::SIMD1 && I->getPredicate() == nullptr)
12146+
{
12147+
return true;
12148+
}
12149+
// 2. GRF spill
12150+
if (I->isSend() && I->getPredicate() == nullptr &&
1211612151
(I->getDst() == nullptr || I->getDst()->isNullReg()))
1211712152
{
12118-
// This shall be a spill (write).
12119-
// May check if the spilled var is global. We only need
12120-
// to do WA for global spill!
1212112153
return true;
1212212154
}
1212312155
return false;
@@ -12175,6 +12207,16 @@ void Optimizer::doNoMaskWA_postRA()
1217512207
// (W & f0.0.any16h) send (16|M0) ...
1217612208
// 3. (W) mov (1|M0) f0.0<1>:uw DW0:uw // restore
1217712209
//
12210+
// For flag spill, the sequence is the same as the above except for the case in which
12211+
// the WAFlag is the same as spilled flag. For example,
12212+
//
12213+
// (W) mov (1|M0) r34.8<1>:uw f0.0<0;1,0>:uw
12214+
//
12215+
// 1. (W) mov (1|M0) DW0:uw f0.0<0;1,0>:uw // save
12216+
// 2. (W) mov (1|M0) f0.0<1>:uw DW1:uw // WARestore
12217+
// (W & f0.0.any16h) mov r34.8<1>:uw DW0.0<0;1,0>:uw
12218+
// 3. (W) mov (1|M0) f0.0<1>:uw DW0:uw // restore
12219+
//
1217812220
// Todo: check if save/restore is needed to avoid redundant save/restore.
1217912221
//
1218012222
G4_Declare* saveTmp = builder.getEUFusionWATmpVar(); // 2DW;
@@ -12215,10 +12257,15 @@ void Optimizer::doNoMaskWA_postRA()
1221512257
// Without optimization, always do save/restore
1221612258
bool needSave = true;
1221712259
bool needRestore = true;
12260+
12261+
// wa flag register to use f(wafregnum, wafsregnum)
12262+
uint32_t wafregnum = 0;
12263+
uint32_t wafsregnum = 0;
12264+
1221812265
G4_Type Ty = (simdsize > 16) ? Type_UD : Type_UW;
1221912266
G4_Declare* flagDcl = builder.createTempFlag((Ty == Type_UW ? 1 : 2), "waflag");
1222012267
G4_RegVar* flagVar = flagDcl->getRegVar();
12221-
flagVar->setPhyReg(builder.phyregpool.getFlagAreg(0), 0);
12268+
flagVar->setPhyReg(builder.phyregpool.getFlagAreg(wafregnum), wafsregnum);
1222212269

1222312270
// Save flag, create WA mask, save WAflag
1222412271
createMov1(BB, WAInsts[0], saveVar, saveOff, flagVar, 0, Ty); // save
@@ -12239,6 +12286,24 @@ void Optimizer::doNoMaskWA_postRA()
1223912286
G4_INST* I = *currII;
1224012287
G4_Predicate* newPred = builder.createPredicate(
1224112288
PredState_Plus, flagVar, 0, waPredCtrl);
12289+
if (I->isMov() && I->getSrc(0) && I->getSrc(0)->isFlag())
12290+
{
12291+
G4_SrcRegRegion* srcReg = I->getSrc(0)->asSrcRegRegion();
12292+
G4_RegVar* baseVar = static_cast<G4_RegVar*>(srcReg->getBase());
12293+
assert(baseVar->isPhyRegAssigned());
12294+
12295+
// For flag, G4_Areg has flag number and G4_RegVar has subRefOff.
12296+
// (SrcRegRegion's refOff/subRefOff is 0/0 always.)
12297+
G4_Areg* flagReg = baseVar->getPhyReg()->getAreg();
12298+
uint32_t subRegOff = baseVar->getPhyRegOff();
12299+
if (flagReg->getFlagNum() == wafregnum &&
12300+
(Ty == Type_UD /* 32bit flag */ || subRegOff == wafsregnum /* 16bit flag */))
12301+
{
12302+
G4_SrcRegRegion* S = builder.createSrc(
12303+
saveVar, 0, saveOff, builder.getRegionScalar(), Ty);
12304+
I->setSrc(S, 0);
12305+
}
12306+
}
1224212307
I->setPredicate(newPred);
1224312308

1224412309
if (i == (sz - 1) || needRestore) {

0 commit comments

Comments
 (0)