@@ -10581,7 +10581,7 @@ void Optimizer::doNoMaskWA()
10581
10581
// flagVar : emask for this BB.
10582
10582
// Note that if 32-bit flag is used, flagVar and this instruction I's condMod
10583
10583
// take two flag registers, leaving no flag for temporary. In this case, we
10584
- // will do manual spill, ie, save and restore the original flag (case 1.2 and 3).
10584
+ // will do manual spill, ie, save and restore the original flag (case 1 and 3).
10585
10585
//
10586
10586
// Before:
10587
10587
// I: (W) cmp (16|M16) (ne)P D .... // 32-bit flag
@@ -10590,15 +10590,11 @@ void Optimizer::doNoMaskWA()
10590
10590
//
10591
10591
// After:
10592
10592
// (1) D = null (common)
10593
- // 1.1) Not simd32 And P is 16-bit modifier (less chance to have flag spill)
10594
- // I: (W) cmp (16|M0) (ne)nP ....
10595
- // I0: (W&flagVar) mov (1|M0) P nP
10596
- // 1.2 general case (save flag into grf to avoid flag spill)
10597
- // I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10598
- // I: (W) cmp (16|M16) (ne)P ....
10599
- // I1: (W&-flagVar) mov (1|M0) P save:ud
10593
+ // I0: (W) mov (1|M0) save:ud P<0;1,0>:ud
10594
+ // I: (W) cmp (16|M16) (ne)P ....
10595
+ // I1: (W&-flagVar) mov (1|M0) P save:ud
10600
10596
// (2) 'I' uses 16-bit flag (common)
10601
- // I0: (W) mov (1) nP<1>:uw flagVar<0;1,0>:uw
10597
+ // I0: (W) mov (1) nP<1>:uw flagVar.0 <0;1,0>:uw
10602
10598
// I: (W&nP) cmp (16|M0) (ne)nP ....
10603
10599
// I1: (W&flagVar) mov (1|M0) P<1>:uw nP<0;1,0>:uw
10604
10600
// (3) otherwise(less common)
@@ -10629,65 +10625,34 @@ void Optimizer::doNoMaskWA()
10629
10625
G4_Type Ty = (modDcl->getWordSize () > 1 ) ? Type_UD : Type_UW;
10630
10626
if (I->hasNULLDst ())
10631
10627
{ // case 1
10632
- if (flagVar->getDeclare ()->getTotalElems () == 1 && Ty == Type_UW)
10633
- { // case 1.1
10634
- assert (I->getExecSize () != g4::SIMD32);
10635
-
10636
- // Use 16-bit flag
10637
- G4_Declare* nPDecl = builder.createTempFlag (1 , " nP" );
10638
- G4_RegVar* nPVar = nPDecl->getRegVar ();
10639
-
10640
- G4_CondMod* nM = builder.createCondMod (P->getMod (), nPVar, 0 );
10641
- I->setCondMod (nM);
10642
-
10643
- auto nextII = currII;
10644
- ++nextII;
10645
-
10646
- G4_SrcRegRegion* I0S0 = builder.createSrc (nPVar,
10647
- 0 , 0 , builder.getRegionScalar (), Ty);
10648
- G4_DstRegRegion* I0D0 = builder.createDst (
10649
- modDcl->getRegVar (), 0 , 0 , 1 , Ty);
10650
- G4_INST* I0 = builder.createMov (g4::SIMD1, I0D0, I0S0, InstOpt_WriteEnable, false );
10651
- G4_Predicate* flag = builder.createPredicate (
10652
- PredState_Plus, flagVar, 0 , getPredCtrl (useAnyh));
10653
- I0->setPredicate (flag);
10654
- flag->setSameAsNoMask (true );
10655
- currBB->insertBefore (nextII, I0);
10656
-
10657
- flagVarDefInst->addDefUse (I0, Opnd_pred);
10658
- I->addDefUse (I0, Opnd_src0);
10659
- }
10660
- else
10661
- { // case 1.2
10662
- G4_Declare* saveDecl = builder.createTempVar (1 , Ty, Any, " saveTmp" );
10663
- G4_RegVar* saveVar = saveDecl->getRegVar ();
10664
- G4_SrcRegRegion* I0S0 = builder.createSrc (
10665
- modDcl->getRegVar (),
10666
- 0 , 0 , builder.getRegionScalar (), Ty);
10667
- G4_DstRegRegion* D0 = builder.createDst (saveVar, 0 , 0 , 1 , Ty);
10668
- G4_INST* I0 = builder.createMov (g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false );
10669
- currBB->insertBefore (currII, I0);
10670
-
10671
- auto nextII = currII;
10672
- ++nextII;
10673
- G4_SrcRegRegion* I1S0 = builder.createSrc (saveVar,
10674
- 0 , 0 , builder.getRegionScalar (), Ty);
10675
- G4_DstRegRegion* D1 = builder.createDst (
10676
- modDcl->getRegVar (), 0 , 0 , 1 , Ty);
10677
- G4_INST* I1 = builder.createMov (g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false );
10678
- G4_Predicate* flag = builder.createPredicate (
10679
- PredState_Minus, flagVar, 0 , getPredCtrl (useAnyh));
10680
- I1->setPredicate (flag);
10681
- currBB->insertBefore (nextII, I1);
10682
-
10683
- flagVarDefInst->addDefUse (I1, Opnd_pred);
10684
- I0->addDefUse (I1, Opnd_src0);
10685
-
10686
- if (!condModGlb)
10687
- {
10688
- // Copy condMod uses to I1.
10689
- I->copyUsesTo (I1, false );
10690
- }
10628
+ G4_Declare* saveDecl = builder.createTempVar (1 , Ty, Any, " saveTmp" );
10629
+ G4_RegVar* saveVar = saveDecl->getRegVar ();
10630
+ G4_SrcRegRegion* I0S0 = builder.createSrc (
10631
+ modDcl->getRegVar (),
10632
+ 0 , 0 , builder.getRegionScalar (), Ty);
10633
+ G4_DstRegRegion* D0 = builder.createDst (saveVar, 0 , 0 , 1 , Ty);
10634
+ G4_INST* I0 = builder.createMov (g4::SIMD1, D0, I0S0, InstOpt_WriteEnable, false );
10635
+ currBB->insertBefore (currII, I0);
10636
+
10637
+ auto nextII = currII;
10638
+ ++nextII;
10639
+ G4_SrcRegRegion* I1S0 = builder.createSrc (saveVar,
10640
+ 0 , 0 , builder.getRegionScalar (), Ty);
10641
+ G4_DstRegRegion* D1 = builder.createDst (
10642
+ modDcl->getRegVar (), 0 , 0 , 1 , Ty);
10643
+ G4_INST* I1 = builder.createMov (g4::SIMD1, D1, I1S0, InstOpt_WriteEnable, false );
10644
+ G4_Predicate* flag = builder.createPredicate (
10645
+ PredState_Minus, flagVar, 0 , getPredCtrl (useAnyh));
10646
+ I1->setPredicate (flag);
10647
+ currBB->insertBefore (nextII, I1);
10648
+
10649
+ flagVarDefInst->addDefUse (I1, Opnd_pred);
10650
+ I0->addDefUse (I1, Opnd_src0);
10651
+
10652
+ if (!condModGlb)
10653
+ {
10654
+ // Copy condMod uses to I1.
10655
+ I->copyUsesTo (I1, false );
10691
10656
}
10692
10657
return ;
10693
10658
}
0 commit comments