Skip to content

Commit 9802595

Browse files
fangliu2020igcbot
authored andcommitted
Fix the madw issue if the dst is not GRF-aligned or the stride is not 1.
Fix the madw issue if the dst is not GRF-aligned or the stride is not 1.
1 parent bdaf8c5 commit 9802595

File tree

1 file changed

+34
-7
lines changed

1 file changed

+34
-7
lines changed

visa/HWConformity.cpp

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7948,24 +7948,51 @@ INST_LIST_ITER HWConformity::fixMadwInst(INST_LIST_ITER it, G4_BB* bb)
79487948
// sat cannot be used at all in the macro sequence
79497949
// make the dst GRF-aligned before expanding to macro
79507950
if (madwInst->getSaturate() ||
7951+
dst->getHorzStride() != 1 ||
79517952
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
79527953
!builder.isOpndAligned(dst, getGRFSize()))
79537954
{
7954-
// add a tmp mov
7955-
madwInst->setDest(insertMovAfter(it, dst, dst->getType(), bb, GRFALIGN));
7956-
dst = madwInst->getDst();
7955+
// add tmp mov instructions
7956+
int dstLowGRFNum = (int)std::ceil((float)(execSize * dst->getExecTypeSize()) / getGRFSize());
7957+
int dstTotalGRFNum = dstLowGRFNum * 2;
7958+
7959+
G4_Declare* newDstDcl = builder.createTempVar(numEltPerGRF(dst->getType()) * dstTotalGRFNum, dst->getType(), GRFALIGN);
7960+
7961+
// add a tmp mov for low results in dst
7962+
G4_Declare* lowMovSrcDcl = builder.createTempVar(numEltPerGRF(dst->getType()) * dstLowGRFNum, dst->getType(), GRFALIGN);
7963+
lowMovSrcDcl->setAliasDeclare(newDstDcl, 0);
7964+
G4_SrcRegRegion* lowMovSrc = builder.createSrcRegRegion(lowMovSrcDcl, builder.getRegionStride1());
7965+
auto dstLow = builder.createDst(dst->getBase(), dst->getRegOff(), dst->getSubRegOff(), dst->getHorzStride(), dst->getType());
7966+
G4_INST* lowMovInst = builder.createMov(execSize, dstLow, lowMovSrc, madwInst->getMaskOption(), false);
7967+
lowMovInst->setPredicate(madwInst->getPredicate());
7968+
lowMovInst->setSaturate(madwInst->getSaturate());
7969+
auto insertIter = bb->insertAfter(it, lowMovInst);
7970+
maintainDU4TempMov(madwInst, lowMovInst);
7971+
7972+
// add a tmp mov for high results in dst
7973+
G4_Declare* hiMovSrcDcl = builder.createTempVar(numEltPerGRF(dst->getType()) * dstLowGRFNum, dst->getType(), GRFALIGN);
7974+
hiMovSrcDcl->setAliasDeclare(newDstDcl, dstLowGRFNum * getGRFSize());
7975+
G4_SrcRegRegion* hiMovSrc = builder.createSrcRegRegion(hiMovSrcDcl, builder.getRegionStride1());
7976+
auto dstHi = builder.createDst(dst->getBase(), dst->getRegOff() + dstLowGRFNum, dst->getSubRegOff(), dst->getHorzStride(), dst->getType());
7977+
G4_INST* hiMovInst = builder.createMov(execSize, dstHi, hiMovSrc, madwInst->getMaskOption(), false);
7978+
hiMovInst->setPredicate(madwInst->getPredicate());
7979+
hiMovInst->setSaturate(madwInst->getSaturate());
7980+
bb->insertAfter(insertIter, hiMovInst);
7981+
maintainDU4TempMov(madwInst, hiMovInst);
7982+
7983+
G4_DstRegRegion* newDst = builder.createDstRegRegion(newDstDcl, 1);
7984+
madwInst->setDest(newDst);
7985+
madwInst->setPredicate(nullptr);
7986+
madwInst->setSaturate(g4::NOSAT);
7987+
dst = newDst;
79577988
}
79587989

7959-
//G4_Type tmpType = (IS_UNSIGNED_INT(src0->getType()) && IS_UNSIGNED_INT(src1->getType()) && IS_UNSIGNED_INT(src2->getType())) ? Type_UD : Type_D;
79607990
INST_LIST_ITER retIter = it;
79617991
if (builder.noMulOrMadwExpandingBeforeScheduler() && builder.getOption(vISA_expandMadwPostSchedule))
79627992
{
79637993
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Madw->Mul+Mach+Addc+Add expanding
79647994
// will be done in expandMadwPostSchedule pass.
79657995

7966-
// sat has bee resolved above, here just set it as NOSAT
7967-
madwInst->setSaturate(g4::NOSAT);
7968-
79697996
// need extra mov if dst is acc and src0 is indirect
79707997
if (!builder.accDstforIndirectSrc())
79717998
{

0 commit comments

Comments
 (0)