Skip to content

Commit 865b3b6

Browse files
jgu222igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 0de60dc
Minor improvement to TGL workaround
1 parent 4b90dab commit 865b3b6

File tree

1 file changed

+33
-68
lines changed

1 file changed

+33
-68
lines changed

visa/Optimizer.cpp

Lines changed: 33 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -10581,7 +10581,7 @@ void Optimizer::doNoMaskWA()
1058110581
// flagVar : emask for this BB.
1058210582
// Note that if 32-bit flag is used, flagVar and this instruction I's condMod
1058310583
// take two flag registers, leaving no flag for temporary. In this case, we
10584-
// will do manual spill, ie, save and restore the original flag (case 1.2 and 3).
10584+
// will do manual spill, ie, save and restore the original flag (case 1 and 3).
1058510585
//
1058610586
// Before:
1058710587
// I: (W) cmp (16|M16) (ne)P D .... // 32-bit flag
@@ -10590,15 +10590,11 @@ void Optimizer::doNoMaskWA()
1059010590
//
1059110591
// After:
1059210592
// (1) D = null (common)
10593-
// 1.1) Not simd32 And P is 16-bit modifier (less chance to have flag spill)
10594-
// I: (W) cmp (16|M0) (ne)nP ....
10595-
// I0: (W&flagVar) mov (1|M0) P nP
10596-
// 1.2 general case (save flag into grf to avoid flag spill)
10597-
// I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10598-
// I: (W) cmp (16|M16) (ne)P ....
10599-
// I1: (W&-flagVar) mov (1|M0) P save:ud
10593+
// I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10594+
// I: (W) cmp (16|M16) (ne)P ....
10595+
// I1: (W&-flagVar) mov (1|M0) P save:ud
1060010596
// (2) 'I' uses 16-bit flag (common)
10601-
// I0: (W) mov (1) nP<1>:uw flagVar<0;1,0>:uw
10597+
// I0: (W) mov (1) nP<1>:uw flagVar.0<0;1,0>:uw
1060210598
// I: (W&nP) cmp (16|M0) (ne)nP ....
1060310599
// I1: (W&flagVar) mov (1|M0) P<1>:uw nP<0;1,0>:uw
1060410600
// (3) otherwise(less common)
@@ -10629,65 +10625,34 @@ void Optimizer::doNoMaskWA()
1062910625
G4_Type Ty = (modDcl->getWordSize() > 1) ? Type_UD : Type_UW;
1063010626
if (I->hasNULLDst())
1063110627
{ // case 1
10632-
if (flagVar->getDeclare()->getTotalElems() == 1 && Ty == Type_UW)
10633-
{ // case 1.1
10634-
assert(I->getExecSize() != g4::SIMD32);
10635-
10636-
// Use 16-bit flag
10637-
G4_Declare* nPDecl = builder.createTempFlag(1, "nP");
10638-
G4_RegVar* nPVar = nPDecl->getRegVar();
10639-
10640-
G4_CondMod* nM = builder.createCondMod(P->getMod(), nPVar, 0);
10641-
I->setCondMod(nM);
10642-
10643-
auto nextII = currII;
10644-
++nextII;
10645-
10646-
G4_SrcRegRegion* I0S0 = builder.createSrc(nPVar,
10647-
0, 0, builder.getRegionScalar(), Ty);
10648-
G4_DstRegRegion* I0D0 = builder.createDst(
10649-
modDcl->getRegVar(), 0, 0, 1, Ty);
10650-
G4_INST* I0 = builder.createMov(g4::SIMD1, I0D0, I0S0, InstOpt_WriteEnable, false);
10651-
G4_Predicate* flag = builder.createPredicate(
10652-
PredState_Plus, flagVar, 0, getPredCtrl(useAnyh));
10653-
I0->setPredicate(flag);
10654-
flag->setSameAsNoMask(true);
10655-
currBB->insertBefore(nextII, I0);
10656-
10657-
flagVarDefInst->addDefUse(I0, Opnd_pred);
10658-
I->addDefUse(I0, Opnd_src0);
10659-
}
10660-
else
10661-
{ // case 1.2
10662-
G4_Declare* saveDecl = builder.createTempVar(1, Ty, Any, "saveTmp");
10663-
G4_RegVar* saveVar = saveDecl->getRegVar();
10664-
G4_SrcRegRegion* I0S0 = builder.createSrc(
10665-
modDcl->getRegVar(),
10666-
0, 0, builder.getRegionScalar(), Ty);
10667-
G4_DstRegRegion* D0 = builder.createDst(saveVar, 0, 0, 1, Ty);
10668-
G4_INST* I0 = builder.createMov(g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false);
10669-
currBB->insertBefore(currII, I0);
10670-
10671-
auto nextII = currII;
10672-
++nextII;
10673-
G4_SrcRegRegion* I1S0 = builder.createSrc(saveVar,
10674-
0, 0, builder.getRegionScalar(), Ty);
10675-
G4_DstRegRegion* D1 = builder.createDst(
10676-
modDcl->getRegVar(), 0, 0, 1, Ty);
10677-
G4_INST* I1 = builder.createMov(g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false);
10678-
G4_Predicate* flag = builder.createPredicate(
10679-
PredState_Minus, flagVar, 0, getPredCtrl(useAnyh));
10680-
I1->setPredicate(flag);
10681-
currBB->insertBefore(nextII, I1);
10682-
10683-
flagVarDefInst->addDefUse(I1, Opnd_pred);
10684-
I0->addDefUse(I1, Opnd_src0);
10685-
10686-
if (!condModGlb)
10687-
{
10688-
// Copy condMod uses to I1.
10689-
I->copyUsesTo(I1, false);
10690-
}
10628+
G4_Declare* saveDecl = builder.createTempVar(1, Ty, Any, "saveTmp");
10629+
G4_RegVar* saveVar = saveDecl->getRegVar();
10630+
G4_SrcRegRegion* I0S0 = builder.createSrc(
10631+
modDcl->getRegVar(),
10632+
0, 0, builder.getRegionScalar(), Ty);
10633+
G4_DstRegRegion* D0 = builder.createDst(saveVar, 0, 0, 1, Ty);
10634+
G4_INST* I0 = builder.createMov(g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false);
10635+
currBB->insertBefore(currII, I0);
10636+
10637+
auto nextII = currII;
10638+
++nextII;
10639+
G4_SrcRegRegion* I1S0 = builder.createSrc(saveVar,
10640+
0, 0, builder.getRegionScalar(), Ty);
10641+
G4_DstRegRegion* D1 = builder.createDst(
10642+
modDcl->getRegVar(), 0, 0, 1, Ty);
10643+
G4_INST* I1 = builder.createMov(g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false);
10644+
G4_Predicate* flag = builder.createPredicate(
10645+
PredState_Minus, flagVar, 0, getPredCtrl(useAnyh));
10646+
I1->setPredicate(flag);
10647+
currBB->insertBefore(nextII, I1);
10648+
10649+
flagVarDefInst->addDefUse(I1, Opnd_pred);
10650+
I0->addDefUse(I1, Opnd_src0);
10651+
10652+
if (!condModGlb)
10653+
{
10654+
// Copy condMod uses to I1.
10655+
I->copyUsesTo(I1, false);
1069110656
}
1069210657
return;
1069310658
}

0 commit comments

Comments
 (0)