@@ -8556,9 +8556,6 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB* bb)
8556
8556
}
8557
8557
}
8558
8558
8559
- // emulate mov HF BF8
8560
- // with
8561
- // shl UW UB 8
8562
8559
bool HWConformity::fixFcvt (INST_LIST_ITER i, G4_BB* bb)
8563
8560
{
8564
8561
G4_INST* inst = *i;
@@ -8567,129 +8564,93 @@ bool HWConformity::fixFcvt(INST_LIST_ITER i, G4_BB* bb)
8567
8564
return false ;
8568
8565
}
8569
8566
8570
- if (inst->getDst ()->getType () == Type_UB)
8567
+ // Format conversion allowed between fp16and fp8 operands in the following cases:
8568
+ // 1, Execution size must not be 1.
8569
+ // 2, fp8 operand is packed.
8570
+ // 3, Source and destination register offset is restricted to 0 (GRF aligned).
8571
+ if (inst->getDst ()->getType () == Type_UB || inst->getSrc (0 )->getType () == Type_UB)
8571
8572
{
8572
- assert ((inst->getSrc (0 )->getType () == Type_HF) &&
8573
- " Only HF->BF8 conversion is supported" );
8573
+ assert (((inst->getDst ()->getType () == Type_UB && inst->getSrc (0 )->getType () == Type_HF)
8574
+ || (inst->getSrc (0 )->getType () == Type_UB && inst->getDst ()->getType () == Type_HF)) &&
8575
+ " Only BF8<->HF conversion is supported" );
8574
8576
assert (!inst->getPredicate () && !inst->getCondMod () && !inst->getSaturate () &&
8575
- " HF->BF8 move does not support pred/cond mod/sat" );
8577
+ " BF8<->HF move does not support pred/cond mod/sat" );
8576
8578
assert (inst->getSrc (0 )->isSrcRegRegion () &&
8577
- " HF->BF8 currently supports non-imm source only" );
8578
- assert (inst->getSrc (0 )->asSrcRegRegion ()->getRegAccess () == Direct &&
8579
+ " HF< ->BF8 currently supports non-imm source only" );
8580
+ assert (inst->getSrc (0 )->isSrcRegRegion () && inst-> getSrc ( 0 )-> asSrcRegRegion ()->getRegAccess () == Direct &&
8579
8581
inst->getSrc (0 )->asSrcRegRegion ()->getModifier () == Mod_src_undef &&
8580
- " HF->BF8 move does not support source modifier" );
8582
+ " BF8<->HF move does not support source modifier" );
8581
8583
8582
- // fix regioning <0;1,0> to <1;1,0> for execution sizes higher than 1.
8583
- if (inst->getSrc (0 )->asSrcRegRegion ()->getRegion ()->isScalar () &&
8584
- inst->getExecSize () != g4::SIMD1)
8585
- {
8586
- inst->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionStride1 ());
8587
- inst->setSrc (insertMovBefore (i, 0 , inst->getSrc (0 )->getType (), bb, ThirtyTwo_Word), 0 );
8588
- INST_LIST_ITER newMovIter = i;
8589
- newMovIter--;
8590
- G4_INST* newMovInst = *newMovIter;
8591
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionScalar ());
8592
- }
8593
- assert (inst->getSrc (0 )->asSrcRegRegion ()->getRegion ()->isContiguous (inst->getExecSize ()) &&
8594
- " HF->BF8 only support <1;1,0> regioning" );
8595
- if (inst->getDst ()->getHorzStride () != 1 )
8584
+ if (!inst->getSrc (0 )->asSrcRegRegion ()->checkGRFAlign () || // case 3
8585
+ (inst->getSrc (0 )->getType () == Type_UB && !inst->getSrc (0 )->asSrcRegRegion ()->getRegion ()->isContiguous (inst->getExecSize ()))) // case 2
8596
8586
{
8597
- replaceDst (i, inst->getDst ()->getType (), ThirtyTwo_Word);
8598
- INST_LIST_ITER newMovIter = i;
8599
- newMovIter++;
8600
- G4_INST* newMovInst = *newMovIter;
8601
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UB);
8602
- newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UB);
8587
+ inst->setSrc (insertMovBefore (i, 0 , inst->getSrc (0 )->getType (), bb, GRFALIGN), 0 );
8588
+ G4_INST* newMovInst = *(std::prev (i));
8589
+ if (newMovInst->getSrc (0 )->getType () == Type_HF)
8590
+ {
8591
+ newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UW);
8592
+ newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UW);
8593
+ }
8594
+ newMovInst->getDst ()->setHorzStride (1 );
8603
8595
if (inst->getExecSize () != g4::SIMD1)
8604
8596
{
8605
- newMovInst ->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionStride1 ());
8597
+ inst ->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionStride1 ());
8606
8598
}
8607
- inst->getDst ()->setHorzStride (1 );
8608
8599
inst->setOptionOn (InstOpt_WriteEnable);
8609
8600
}
8610
- if (!builder.isOpndAligned (inst->getDst (), 64 ) ||
8611
- !inst->isWriteEnableInst ())
8612
- {
8613
- replaceDst (i, inst->getDst ()->getType (), ThirtyTwo_Word);
8614
- INST_LIST_ITER newMovIter = i;
8615
- newMovIter++;
8616
- G4_INST* newMovInst = *newMovIter;
8617
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UB);
8618
- newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UB);
8619
- inst->setOptionOn (InstOpt_WriteEnable);
8620
- }
8621
- if (!builder.isOpndAligned (inst->getSrc (0 ), 64 ))
8601
+
8602
+ // case 1.1: SIMD1 hf->bf8
8603
+ // (W) mov (1|M0) r10.0<1>:bf8 r12.0<0;1,0>:hf
8604
+ // =>
8605
+ // (W) mov (2|M0) r20.0<1>:bf8 r12.0<0;1,0>:hf
8606
+ // (W) mov (1|M0) r10.0<1>:ub r20.0<0;1,0>:ub
8607
+ if (inst->getExecSize () == g4::SIMD1 && inst->getDst ()->getType () == Type_UB) // case 1.1
8622
8608
{
8623
- inst->setSrc (insertMovBefore (i, 0 , inst->getSrc (0 )->getType (), bb, ThirtyTwo_Word), 0 );
8609
+ G4_Declare* dcl = builder.createTempVar (2 , Type_UB, GRFALIGN);
8610
+ G4_SrcRegRegion* srcRegion = builder.createSrcRegRegion (dcl, builder.getRegionScalar ());
8611
+ uint32_t newOption = InstOpt_WriteEnable | inst->getMaskOption ();
8612
+ G4_INST* newMovInst = builder.createMov (g4::SIMD1, inst->getDst (), srcRegion, newOption, false );
8613
+ bb->insertAfter (i, newMovInst);
8614
+
8615
+ G4_DstRegRegion* newDst = builder.createDstRegRegion (dcl, 1 );
8616
+ inst->setDest (newDst);
8617
+ inst->setExecSize (g4::SIMD2);
8624
8618
}
8625
- return true ;
8626
- }
8627
8619
8628
- if (inst->getSrc (0 )->getType () == Type_UB)
8629
- {
8630
- assert ((inst->getDst ()->getType () == Type_HF) &&
8631
- " Only BF8->HF conversion is supported" );
8632
- assert (!inst->getPredicate () && !inst->getCondMod () && !inst->getSaturate () &&
8633
- " BF8->HF move does not support pred/cond mod/sat" );
8634
- // don't support QF imm for now
8635
- assert (inst->getSrc (0 )->isSrcRegRegion () && inst->getSrc (0 )->asSrcRegRegion ()->getRegAccess () == Direct &&
8636
- inst->getSrc (0 )->asSrcRegRegion ()->getModifier () == Mod_src_undef &&
8637
- " BF8->HF move does not support source modifier" );
8638
-
8639
- // fix regioning <0;1,0> to <1;1,0> for execution sizes higher than 1.
8640
- if (inst->getSrc (0 )->asSrcRegRegion ()->getRegion ()->isScalar () &&
8641
- inst->getExecSize () != g4::SIMD1)
8642
- {
8643
- inst->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionStride1 ());
8644
- inst->setSrc (insertMovBefore (i, 0 , inst->getSrc (0 )->getType (), bb, ThirtyTwo_Word), 0 );
8645
- INST_LIST_ITER newMovIter = i;
8646
- newMovIter--;
8647
- G4_INST* newMovInst = *newMovIter;
8648
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UB);
8649
- newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UB);
8650
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionScalar ());
8651
- }
8652
- assert (inst->getSrc (0 )->asSrcRegRegion ()->getRegion ()->isContiguous (inst->getExecSize ()) &&
8653
- " BF8->HF only support <1;1,0> regioning" );
8654
- if (inst->getDst ()->getHorzStride () != 1 )
8620
+ if ((inst->getDst ()->getType () == Type_UB && inst->getDst ()->getHorzStride () != 1 ) || // case 2
8621
+ !inst->getDst ()->checkGRFAlign ()) // case 3
8655
8622
{
8623
+ replaceDst (i, inst->getDst ()->getType (), GRFALIGN);
8624
+ G4_INST* newMovInst = *(std::next (i));
8625
+ if (newMovInst->getDst ()->getType () == Type_HF)
8626
+ {
8627
+ newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UW);
8628
+ newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UW);
8629
+ }
8656
8630
if (inst->getExecSize () != g4::SIMD1)
8657
8631
{
8658
- replaceDst (i, inst->getDst ()->getType (), ThirtyTwo_Word);
8659
- INST_LIST_ITER newMovIter = i;
8660
- newMovIter++;
8661
- G4_INST* newMovInst = *newMovIter;
8662
8632
newMovInst->getSrc (0 )->asSrcRegRegion ()->setRegion (builder.getRegionStride1 ());
8663
8633
}
8664
8634
inst->getDst ()->setHorzStride (1 );
8635
+ inst->setOptionOn (InstOpt_WriteEnable);
8665
8636
}
8666
- if (!builder.isOpndAligned (inst->getDst (), 64 ))
8667
- {
8668
- replaceDst (i, inst->getDst ()->getType (), ThirtyTwo_Word);
8669
- }
8670
- if (!builder.isOpndAligned (inst->getSrc (0 ), 64 ))
8637
+
8638
+ // case 1.2: SIMD1 bf8->hf
8639
+ // (W) mov (1|M0) r10.0<1>:hf r12.0<0;1,0>:bf8
8640
+ // =>
8641
+ // (W) shl (1|M0) r10.0<1>:uw r12.0<0;1,0>:ub 0x8:uw
8642
+ if (inst->getExecSize () == g4::SIMD1 && inst->getSrc (0 )->getType () == Type_UB)
8671
8643
{
8672
- inst->setSrc (insertMovBefore (i, 0 , inst->getSrc (0 )->getType (), bb, ThirtyTwo_Word), 0 );
8673
- INST_LIST_ITER newMovIter = i;
8674
- newMovIter--;
8675
- G4_INST* newMovInst = *newMovIter;
8676
- newMovInst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UB);
8677
- newMovInst->getDst ()->asDstRegRegion ()->setType (Type_UB);
8644
+ inst->getDst ()->setType (Type_UW);
8645
+ auto newShlInst = builder.createBinOp (G4_shl,
8646
+ inst->getExecSize (), inst->getDst (), inst->getSrc (0 )->asSrcRegRegion (), builder.createImm (8 , Type_UW), inst->getOption (), false );
8647
+ bb->insertBefore (i, newShlInst);
8648
+ bb->erase (i);
8678
8649
}
8679
8650
8680
- inst->getSrc (0 )->asSrcRegRegion ()->setType (Type_UB);
8681
- G4_SrcRegRegion* newSrc0 = inst->getSrc (0 )->asSrcRegRegion ();
8682
-
8683
- inst->getDst ()->setType (Type_UW);
8684
- auto newDst = inst->getDst ();
8685
-
8686
- auto shlInst = builder.createBinOp (G4_shl,
8687
- inst->getExecSize (), newDst, newSrc0, builder.createImm (8 , Type_UW), inst->getOption (), false );
8688
- bb->insertBefore (i, shlInst);
8689
- bb->erase (i);
8690
-
8691
8651
return true ;
8692
8652
}
8653
+
8693
8654
if (inst->getSrc (0 )->getType () == Type_UD)
8694
8655
{
8695
8656
// fcvt a:F b:tf32
@@ -8758,6 +8719,12 @@ void HWConformity::fixByteXBarRestriction(INST_LIST_ITER it, G4_BB* bb)
8758
8719
{
8759
8720
G4_INST* inst = *it;
8760
8721
8722
+ // G4_fcvt should be fixed in fixFcvt()
8723
+ if (inst->opcode () == G4_fcvt)
8724
+ {
8725
+ return ;
8726
+ }
8727
+
8761
8728
if (!inst->getDst () || inst->isSend () || inst->isDpas () ||
8762
8729
inst->getExecSize () == g4::SIMD1)
8763
8730
{
0 commit comments