@@ -5637,29 +5637,30 @@ void HWConformity::conformBB(G4_BB* bb)
5637
5637
}
5638
5638
5639
5639
//
5640
- // SIMD16 addc/subb are illegal on GEN, since they write to acc and there are only 8 acc
5641
- // channels for D/UD type. In vISA IR we should get something like
5642
- // addc (16) V0 V2 V3
5643
- // mov (16) V1 acc0<8;8,1> :ud
5640
+ // SIMD16 addc/subb are illegal on GEN, since they write to acc and there are
5641
+ // only 8 acc channels for D/UD type. In vISA IR we should get something like
5642
+ // addc (16|M0 ) V0 V2 V3
5643
+ // use (16|M0 ) V1 ... acc0:ud // or :d
5644
5644
// which needs to be translated to
5645
- // addc (8) V0(0) V2(0) V3(0) {Q1}
5646
- // mov (8) V1(0) acc0<8;8,1>:ud {Q1}
5647
- // addc (8) V0(1) V2(1) V3(1) {Q2}
5648
- // mov (8) V1(1) acc0<8;8,1>:ud {Q2}
5645
+ // addc (8|M0) V0(0) V2(0) V3(0)
5646
+ // use (8|M0) V1(0) ... acc0:ud
5647
+ // addc (8|M8) V0(1) V2(1) V3(1)
5648
+ // use (8|M8) V1(1) ... acc0:ud
5649
+ // NOTE: we also support other consumers such as add.
5650
+ //
5649
5651
//
5650
5652
// We do this first thing in HW conformity to avoid REXES from splitting addc/subb incorrectly
5651
5653
// We also count on previous opt to preserve the inst pair by not inserting any acc using inst in between;
5652
5654
// it should hopefully be the case since we generally don't optimize instructions with acc src/dst
5653
5655
//
5654
5656
// If exec size of addc is < 8, we also have to make sure both the addc's dst and the carry move's dst are
5655
5657
// GRF-aligned, since acc's channel is dependent on the dst's subreg offset. In other words, we fix
5656
- // addc (1) r1.0 ...
5657
- // mov (1) r1.1 acc0.0<0;1,0>
5658
+ // addc (1) r1.0 ...
5659
+ // mov (1) r1.1 acc0.0<0;1,0>
5658
5660
// into
5659
- // addc (1) r1.0 ...
5660
- // mov (1) r2.0 acc0.0<0;1,0>
5661
- // mov (1) r1.1 r2.0
5662
- //
5661
+ // addc (1) r1.0 ...
5662
+ // mov (1) r2.0 acc0.0<0;1,0>
5663
+ // mov (1) r1.1 r2.0
5663
5664
//
5664
5665
bool HWConformity::fixAddcSubb (G4_BB* bb)
5665
5666
{
@@ -5672,40 +5673,59 @@ bool HWConformity::fixAddcSubb(G4_BB* bb)
5672
5673
inst->getExecSize () != builder.getNativeExecSize ())
5673
5674
{
5674
5675
// find the matching carry move
5675
- G4_INST* carryMov = nullptr ;
5676
- auto movIter = iter;
5677
- for (++movIter; movIter != iterEnd; ++movIter )
5676
+ G4_INST* carryUse = nullptr ;
5677
+ auto srchIter = iter;
5678
+ for (++srchIter; srchIter != iterEnd; ++srchIter )
5678
5679
{
5679
- G4_INST* inst2 = *movIter;
5680
- if (inst2->opcode () == G4_mov && inst2->getExecSize () == inst->getExecSize () &&
5681
- inst2->getSrc (0 )->isAccReg () && inst2->getSrc (0 )->getType () == Type_UD)
5680
+ G4_INST* inst2 = *srchIter;
5681
+ auto op = inst2->opcode ();
5682
+
5683
+ bool opPossibleConsumer =
5684
+ op == G4_mov || op == G4_add || op == G4_addc ||
5685
+ op == G4_mad || op == G4_pseudo_mad;
5686
+
5687
+ auto srcUsesAcc = [&] (int srcIx) {
5688
+ if (srcIx >= inst2->getNumSrc ())
5689
+ return false ;
5690
+ auto type = inst2->getSrc (srcIx)->getType ();
5691
+ return inst2->getSrc (srcIx)->isAccReg () &&
5692
+ (type == Type_UD || type == Type_D);
5693
+ };
5694
+
5695
+ // only check for a handful of user instructions
5696
+ // this list could be extended
5697
+ if (opPossibleConsumer &&
5698
+ inst2->getExecSize () == inst->getExecSize () &&
5699
+ (srcUsesAcc (0 ) || srcUsesAcc (1 ) || srcUsesAcc (1 )))
5682
5700
{
5683
- carryMov = inst2;
5701
+ carryUse = inst2;
5684
5702
break ;
5685
5703
}
5686
5704
else if (inst2->useAcc ())
5687
5705
{
5706
+ // someone redefines acc0; we can stop looking
5688
5707
break ;
5689
5708
}
5690
5709
}
5691
5710
5692
- if (carryMov == NULL )
5711
+ if (carryUse == NULL )
5693
5712
{
5694
5713
// can't find the move using acc, skip this addc/subb
5695
- assert (false && " expect a carry move instruction " );
5714
+ assert (false && " unable to find addc/subc consumer " );
5696
5715
continue ;
5697
5716
}
5698
5717
5699
5718
if (inst->getExecSize () > builder.getNativeExecSize ())
5700
5719
{
5720
+ // we're breaking a bigger instruction into a smaller one
5701
5721
evenlySplitInst (iter, bb);
5702
- evenlySplitInst (movIter , bb);
5722
+ evenlySplitInst (srchIter , bb);
5703
5723
5704
- // movIter now points to the second half of move, and we want to move the first move to be
5724
+ // srchIter now points to the second half of move, and we want to move the first move to be
5705
5725
// before the second half of the addc/subb, which is pointed by iter
5706
- --movIter ;
5707
- G4_INST* mov1 = *movIter ;
5708
- bb->erase (movIter );
5726
+ --srchIter ;
5727
+ G4_INST* mov1 = *srchIter ;
5728
+ bb->erase (srchIter );
5709
5729
bb->insertBefore (iter, mov1);
5710
5730
5711
5731
changed = true ;
@@ -5721,10 +5741,10 @@ bool HWConformity::fixAddcSubb(G4_BB* bb)
5721
5741
insertMovAfter (iter, inst->getDst (), inst->getDst ()->getType (), bb));
5722
5742
changed = true ;
5723
5743
}
5724
- if (!builder.isOpndAligned (carryMov ->getDst (), 32 ))
5744
+ if (!builder.isOpndAligned (carryUse ->getDst (), 32 ))
5725
5745
{
5726
- carryMov ->setDest (
5727
- insertMovAfter (movIter, carryMov ->getDst (), carryMov ->getDst ()->getType (), bb));
5746
+ carryUse ->setDest (
5747
+ insertMovAfter (srchIter, carryUse ->getDst (), carryUse ->getDst ()->getType (), bb));
5728
5748
changed = true ;
5729
5749
}
5730
5750
}
0 commit comments