Skip to content

Commit debb95d

Browse files
fangliu2020igcbot
authored andcommitted
[vISA]Fix regioning issues of format conversion between bf8 and hf.
Format conversion allowed between fp16 and fp8 operands in the following cases: 1, Execution size must not be 1. 2, fp8 operand is packed. 3, Source and destination register offset is restricted to 0.
1 parent 6715902 commit debb95d

File tree

1 file changed

+67
-100
lines changed

1 file changed

+67
-100
lines changed

visa/HWConformity.cpp

Lines changed: 67 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -8556,9 +8556,6 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB* bb)
85568556
}
85578557
}
85588558

8559-
// emulate mov HF BF8
8560-
// with
8561-
// shl UW UB 8
85628559
bool HWConformity::fixFcvt(INST_LIST_ITER i, G4_BB* bb)
85638560
{
85648561
G4_INST* inst = *i;
@@ -8567,129 +8564,93 @@ bool HWConformity::fixFcvt(INST_LIST_ITER i, G4_BB* bb)
85678564
return false;
85688565
}
85698566

8570-
if (inst->getDst()->getType() == Type_UB)
8567+
// Format conversion allowed between fp16and fp8 operands in the following cases:
8568+
// 1, Execution size must not be 1.
8569+
// 2, fp8 operand is packed.
8570+
// 3, Source and destination register offset is restricted to 0 (GRF aligned).
8571+
if (inst->getDst()->getType() == Type_UB || inst->getSrc(0)->getType() == Type_UB)
85718572
{
8572-
assert((inst->getSrc(0)->getType() == Type_HF) &&
8573-
"Only HF->BF8 conversion is supported");
8573+
assert(((inst->getDst()->getType() == Type_UB && inst->getSrc(0)->getType() == Type_HF)
8574+
|| (inst->getSrc(0)->getType() == Type_UB && inst->getDst()->getType() == Type_HF)) &&
8575+
"Only BF8<->HF conversion is supported");
85748576
assert(!inst->getPredicate() && !inst->getCondMod() && !inst->getSaturate() &&
8575-
"HF->BF8 move does not support pred/cond mod/sat");
8577+
"BF8<->HF move does not support pred/cond mod/sat");
85768578
assert(inst->getSrc(0)->isSrcRegRegion() &&
8577-
"HF->BF8 currently supports non-imm source only");
8578-
assert(inst->getSrc(0)->asSrcRegRegion()->getRegAccess() == Direct &&
8579+
"HF<->BF8 currently supports non-imm source only");
8580+
assert(inst->getSrc(0)->isSrcRegRegion() && inst->getSrc(0)->asSrcRegRegion()->getRegAccess() == Direct &&
85798581
inst->getSrc(0)->asSrcRegRegion()->getModifier() == Mod_src_undef &&
8580-
"HF->BF8 move does not support source modifier");
8582+
"BF8<->HF move does not support source modifier");
85818583

8582-
// fix regioning <0;1,0> to <1;1,0> for execution sizes higher than 1.
8583-
if (inst->getSrc(0)->asSrcRegRegion()->getRegion()->isScalar() &&
8584-
inst->getExecSize() != g4::SIMD1)
8585-
{
8586-
inst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionStride1());
8587-
inst->setSrc(insertMovBefore(i, 0, inst->getSrc(0)->getType(), bb, ThirtyTwo_Word), 0);
8588-
INST_LIST_ITER newMovIter = i;
8589-
newMovIter--;
8590-
G4_INST* newMovInst = *newMovIter;
8591-
newMovInst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionScalar());
8592-
}
8593-
assert(inst->getSrc(0)->asSrcRegRegion()->getRegion()->isContiguous(inst->getExecSize()) &&
8594-
"HF->BF8 only support <1;1,0> regioning");
8595-
if (inst->getDst()->getHorzStride() != 1)
8584+
if (!inst->getSrc(0)->asSrcRegRegion()->checkGRFAlign() || //case 3
8585+
(inst->getSrc(0)->getType() == Type_UB && !inst->getSrc(0)->asSrcRegRegion()->getRegion()->isContiguous(inst->getExecSize()))) // case 2
85968586
{
8597-
replaceDst(i, inst->getDst()->getType(), ThirtyTwo_Word);
8598-
INST_LIST_ITER newMovIter = i;
8599-
newMovIter++;
8600-
G4_INST* newMovInst = *newMovIter;
8601-
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UB);
8602-
newMovInst->getDst()->asDstRegRegion()->setType(Type_UB);
8587+
inst->setSrc(insertMovBefore(i, 0, inst->getSrc(0)->getType(), bb, GRFALIGN), 0);
8588+
G4_INST* newMovInst = *(std::prev(i));
8589+
if (newMovInst->getSrc(0)->getType() == Type_HF)
8590+
{
8591+
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UW);
8592+
newMovInst->getDst()->asDstRegRegion()->setType(Type_UW);
8593+
}
8594+
newMovInst->getDst()->setHorzStride(1);
86038595
if (inst->getExecSize() != g4::SIMD1)
86048596
{
8605-
newMovInst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionStride1());
8597+
inst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionStride1());
86068598
}
8607-
inst->getDst()->setHorzStride(1);
86088599
inst->setOptionOn(InstOpt_WriteEnable);
86098600
}
8610-
if (!builder.isOpndAligned(inst->getDst(), 64) ||
8611-
!inst->isWriteEnableInst())
8612-
{
8613-
replaceDst(i, inst->getDst()->getType(), ThirtyTwo_Word);
8614-
INST_LIST_ITER newMovIter = i;
8615-
newMovIter++;
8616-
G4_INST* newMovInst = *newMovIter;
8617-
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UB);
8618-
newMovInst->getDst()->asDstRegRegion()->setType(Type_UB);
8619-
inst->setOptionOn(InstOpt_WriteEnable);
8620-
}
8621-
if (!builder.isOpndAligned(inst->getSrc(0), 64))
8601+
8602+
// case 1.1: SIMD1 hf->bf8
8603+
// (W) mov (1|M0) r10.0<1>:bf8 r12.0<0;1,0>:hf
8604+
// =>
8605+
// (W) mov (2|M0) r20.0<1>:bf8 r12.0<0;1,0>:hf
8606+
// (W) mov (1|M0) r10.0<1>:ub r20.0<0;1,0>:ub
8607+
if (inst->getExecSize() == g4::SIMD1 && inst->getDst()->getType() == Type_UB) //case 1.1
86228608
{
8623-
inst->setSrc(insertMovBefore(i, 0, inst->getSrc(0)->getType(), bb, ThirtyTwo_Word), 0);
8609+
G4_Declare* dcl = builder.createTempVar(2, Type_UB, GRFALIGN);
8610+
G4_SrcRegRegion* srcRegion = builder.createSrcRegRegion(dcl, builder.getRegionScalar());
8611+
uint32_t newOption = InstOpt_WriteEnable | inst->getMaskOption();
8612+
G4_INST* newMovInst = builder.createMov(g4::SIMD1, inst->getDst(), srcRegion, newOption, false);
8613+
bb->insertAfter(i, newMovInst);
8614+
8615+
G4_DstRegRegion* newDst = builder.createDstRegRegion(dcl, 1);
8616+
inst->setDest(newDst);
8617+
inst->setExecSize(g4::SIMD2);
86248618
}
8625-
return true;
8626-
}
86278619

8628-
if (inst->getSrc(0)->getType() == Type_UB)
8629-
{
8630-
assert((inst->getDst()->getType() == Type_HF) &&
8631-
"Only BF8->HF conversion is supported");
8632-
assert(!inst->getPredicate() && !inst->getCondMod() && !inst->getSaturate() &&
8633-
"BF8->HF move does not support pred/cond mod/sat");
8634-
// don't support QF imm for now
8635-
assert(inst->getSrc(0)->isSrcRegRegion() && inst->getSrc(0)->asSrcRegRegion()->getRegAccess() == Direct &&
8636-
inst->getSrc(0)->asSrcRegRegion()->getModifier() == Mod_src_undef &&
8637-
"BF8->HF move does not support source modifier");
8638-
8639-
// fix regioning <0;1,0> to <1;1,0> for execution sizes higher than 1.
8640-
if (inst->getSrc(0)->asSrcRegRegion()->getRegion()->isScalar() &&
8641-
inst->getExecSize() != g4::SIMD1)
8642-
{
8643-
inst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionStride1());
8644-
inst->setSrc(insertMovBefore(i, 0, inst->getSrc(0)->getType(), bb, ThirtyTwo_Word), 0);
8645-
INST_LIST_ITER newMovIter = i;
8646-
newMovIter--;
8647-
G4_INST* newMovInst = *newMovIter;
8648-
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UB);
8649-
newMovInst->getDst()->asDstRegRegion()->setType(Type_UB);
8650-
newMovInst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionScalar());
8651-
}
8652-
assert(inst->getSrc(0)->asSrcRegRegion()->getRegion()->isContiguous(inst->getExecSize()) &&
8653-
"BF8->HF only support <1;1,0> regioning");
8654-
if (inst->getDst()->getHorzStride() != 1)
8620+
if ((inst->getDst()->getType() == Type_UB && inst->getDst()->getHorzStride() != 1) || //case 2
8621+
!inst->getDst()->checkGRFAlign()) // case 3
86558622
{
8623+
replaceDst(i, inst->getDst()->getType(), GRFALIGN);
8624+
G4_INST* newMovInst = *(std::next(i));
8625+
if (newMovInst->getDst()->getType() == Type_HF)
8626+
{
8627+
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UW);
8628+
newMovInst->getDst()->asDstRegRegion()->setType(Type_UW);
8629+
}
86568630
if (inst->getExecSize() != g4::SIMD1)
86578631
{
8658-
replaceDst(i, inst->getDst()->getType(), ThirtyTwo_Word);
8659-
INST_LIST_ITER newMovIter = i;
8660-
newMovIter++;
8661-
G4_INST* newMovInst = *newMovIter;
86628632
newMovInst->getSrc(0)->asSrcRegRegion()->setRegion(builder.getRegionStride1());
86638633
}
86648634
inst->getDst()->setHorzStride(1);
8635+
inst->setOptionOn(InstOpt_WriteEnable);
86658636
}
8666-
if (!builder.isOpndAligned(inst->getDst(), 64))
8667-
{
8668-
replaceDst(i, inst->getDst()->getType(), ThirtyTwo_Word);
8669-
}
8670-
if (!builder.isOpndAligned(inst->getSrc(0), 64))
8637+
8638+
// case 1.2: SIMD1 bf8->hf
8639+
// (W) mov (1|M0) r10.0<1>:hf r12.0<0;1,0>:bf8
8640+
// =>
8641+
// (W) shl (1|M0) r10.0<1>:uw r12.0<0;1,0>:ub 0x8:uw
8642+
if (inst->getExecSize() == g4::SIMD1 && inst->getSrc(0)->getType() == Type_UB)
86718643
{
8672-
inst->setSrc(insertMovBefore(i, 0, inst->getSrc(0)->getType(), bb, ThirtyTwo_Word), 0);
8673-
INST_LIST_ITER newMovIter = i;
8674-
newMovIter--;
8675-
G4_INST* newMovInst = *newMovIter;
8676-
newMovInst->getSrc(0)->asSrcRegRegion()->setType(Type_UB);
8677-
newMovInst->getDst()->asDstRegRegion()->setType(Type_UB);
8644+
inst->getDst()->setType(Type_UW);
8645+
auto newShlInst = builder.createBinOp(G4_shl,
8646+
inst->getExecSize(), inst->getDst(), inst->getSrc(0)->asSrcRegRegion(), builder.createImm(8, Type_UW), inst->getOption(), false);
8647+
bb->insertBefore(i, newShlInst);
8648+
bb->erase(i);
86788649
}
86798650

8680-
inst->getSrc(0)->asSrcRegRegion()->setType(Type_UB);
8681-
G4_SrcRegRegion* newSrc0 = inst->getSrc(0)->asSrcRegRegion();
8682-
8683-
inst->getDst()->setType(Type_UW);
8684-
auto newDst = inst->getDst();
8685-
8686-
auto shlInst = builder.createBinOp(G4_shl,
8687-
inst->getExecSize(), newDst, newSrc0, builder.createImm(8, Type_UW), inst->getOption(), false);
8688-
bb->insertBefore(i, shlInst);
8689-
bb->erase(i);
8690-
86918651
return true;
86928652
}
8653+
86938654
if (inst->getSrc(0)->getType() == Type_UD)
86948655
{
86958656
// fcvt a:F b:tf32
@@ -8758,6 +8719,12 @@ void HWConformity::fixByteXBarRestriction(INST_LIST_ITER it, G4_BB* bb)
87588719
{
87598720
G4_INST* inst = *it;
87608721

8722+
// G4_fcvt should be fixed in fixFcvt()
8723+
if (inst->opcode() == G4_fcvt)
8724+
{
8725+
return;
8726+
}
8727+
87618728
if (!inst->getDst() || inst->isSend() || inst->isDpas() ||
87628729
inst->getExecSize() == g4::SIMD1)
87638730
{

0 commit comments

Comments
 (0)