Skip to content

Commit 364808d

Browse files
jgu222igcbot
authored andcommitted
Minor improvement to TGL workaround
1 parent aa3f438 commit 364808d

File tree

1 file changed

+64
-30
lines changed

1 file changed

+64
-30
lines changed

visa/Optimizer.cpp

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10581,7 +10581,7 @@ void Optimizer::doNoMaskWA()
1058110581
// flagVar : emask for this BB.
1058210582
// Note that if 32-bit flag is used, flagVar and this instruction I's condMod
1058310583
// take two flag registers, leaving no flag for temporary. In this case, we
10584-
// will do manual spill, ie, save and restore the original flag (case 1 and 3).
10584+
// will do manual spill, ie, save and restore the original flag (case 1.2 and 3).
1058510585
//
1058610586
// Before:
1058710587
// I: (W) cmp (16|M16) (ne)P D .... // 32-bit flag
@@ -10590,11 +10590,15 @@ void Optimizer::doNoMaskWA()
1059010590
//
1059110591
// After:
1059210592
// (1) D = null (common)
10593-
// I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10594-
// I: (W) cmp (16|M16) (ne)P ....
10595-
// I1: (W&-flagVar) mov (1|M0) P save:ud
10593+
// 1.1) Not simd32 And P is 16-bit modifier (less chance to have flag spill)
10594+
// I: (W) cmp (16|M0) (ne)nP ....
10595+
// I0: (W&flagVar) mov (1|M0) P nP
10596+
// 1.2 general case (save flag into grf to avoid flag spill)
10597+
// I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10598+
// I: (W) cmp (16|M16) (ne)P ....
10599+
// I1: (W&-flagVar) mov (1|M0) P save:ud
1059610600
// (2) 'I' uses 16-bit flag (common)
10597-
// I0: (W) mov (1) nP<1>:uw flagVar.0<0;1,0>:uw
10601+
// I0: (W) mov (1) nP<1>:uw flagVar<0;1,0>:uw
1059810602
// I: (W&nP) cmp (16|M0) (ne)nP ....
1059910603
// I1: (W&flagVar) mov (1|M0) P<1>:uw nP<0;1,0>:uw
1060010604
// (3) otherwise(less common)
@@ -10625,34 +10629,64 @@ void Optimizer::doNoMaskWA()
1062510629
G4_Type Ty = (modDcl->getWordSize() > 1) ? Type_UD : Type_UW;
1062610630
if (I->hasNULLDst())
1062710631
{ // case 1
10628-
G4_Declare* saveDecl = builder.createTempVar(1, Ty, Any, "saveTmp");
10629-
G4_RegVar* saveVar = saveDecl->getRegVar();
10630-
G4_SrcRegRegion* I0S0 = builder.createSrc(
10631-
modDcl->getRegVar(),
10632-
0, 0, builder.getRegionScalar(), Ty);
10633-
G4_DstRegRegion* D0 = builder.createDst(saveVar, 0, 0, 1, Ty);
10634-
G4_INST* I0 = builder.createMov(g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false);
10635-
currBB->insertBefore(currII, I0);
10632+
if (flagVar->getDeclare()->getTotalElems() == 1 && Ty == Type_UW)
10633+
{ // case 1.1
10634+
assert(I->getExecSize() != g4::SIMD32);
1063610635

10637-
auto nextII = currII;
10638-
++nextII;
10639-
G4_SrcRegRegion* I1S0 = builder.createSrc(saveVar,
10640-
0, 0, builder.getRegionScalar(), Ty);
10641-
G4_DstRegRegion* D1 = builder.createDst(
10642-
modDcl->getRegVar(), 0, 0, 1, Ty);
10643-
G4_INST* I1 = builder.createMov(g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false);
10644-
G4_Predicate* flag = builder.createPredicate(
10645-
PredState_Minus, flagVar, 0, getPredCtrl(useAnyh));
10646-
I1->setPredicate(flag);
10647-
currBB->insertBefore(nextII, I1);
10636+
// Use 16-bit flag
10637+
G4_Declare* nPDecl = builder.createTempFlag(1, "nP");
10638+
G4_RegVar* nPVar = nPDecl->getRegVar();
1064810639

10649-
flagVarDefInst->addDefUse(I1, Opnd_pred);
10650-
I0->addDefUse(I1, Opnd_src0);
10640+
G4_CondMod* nM = builder.createCondMod(P->getMod(), nPVar, 0);
10641+
I->setCondMod(nM);
1065110642

10652-
if (!condModGlb)
10653-
{
10654-
// Copy condMod uses to I1.
10655-
I->copyUsesTo(I1, false);
10643+
auto nextII = currII;
10644+
++nextII;
10645+
10646+
G4_SrcRegRegion* I0S0 = builder.createSrc(nPVar,
10647+
0, 0, builder.getRegionScalar(), Ty);
10648+
G4_DstRegRegion* I0D0 = builder.createDst(
10649+
modDcl->getRegVar(), 0, 0, 1, Ty);
10650+
G4_INST* I0 = builder.createMov(g4::SIMD1, I0D0, I0S0, InstOpt_WriteEnable, false);
10651+
G4_Predicate* flag = builder.createPredicate(
10652+
PredState_Plus, flagVar, 0, getPredCtrl(useAnyh));
10653+
I0->setPredicate(flag);
10654+
currBB->insertBefore(nextII, I0);
10655+
10656+
flagVarDefInst->addDefUse(I0, Opnd_pred);
10657+
I->addDefUse(I0, Opnd_src0);
10658+
}
10659+
else
10660+
{ // case 1.2
10661+
G4_Declare* saveDecl = builder.createTempVar(1, Ty, Any, "saveTmp");
10662+
G4_RegVar* saveVar = saveDecl->getRegVar();
10663+
G4_SrcRegRegion* I0S0 = builder.createSrc(
10664+
modDcl->getRegVar(),
10665+
0, 0, builder.getRegionScalar(), Ty);
10666+
G4_DstRegRegion* D0 = builder.createDst(saveVar, 0, 0, 1, Ty);
10667+
G4_INST* I0 = builder.createMov(g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false);
10668+
currBB->insertBefore(currII, I0);
10669+
10670+
auto nextII = currII;
10671+
++nextII;
10672+
G4_SrcRegRegion* I1S0 = builder.createSrc(saveVar,
10673+
0, 0, builder.getRegionScalar(), Ty);
10674+
G4_DstRegRegion* D1 = builder.createDst(
10675+
modDcl->getRegVar(), 0, 0, 1, Ty);
10676+
G4_INST* I1 = builder.createMov(g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false);
10677+
G4_Predicate* flag = builder.createPredicate(
10678+
PredState_Minus, flagVar, 0, getPredCtrl(useAnyh));
10679+
I1->setPredicate(flag);
10680+
currBB->insertBefore(nextII, I1);
10681+
10682+
flagVarDefInst->addDefUse(I1, Opnd_pred);
10683+
I0->addDefUse(I1, Opnd_src0);
10684+
10685+
if (!condModGlb)
10686+
{
10687+
// Copy condMod uses to I1.
10688+
I->copyUsesTo(I1, false);
10689+
}
1065610690
}
1065710691
return;
1065810692
}

0 commit comments

Comments
 (0)