@@ -7948,24 +7948,51 @@ INST_LIST_ITER HWConformity::fixMadwInst(INST_LIST_ITER it, G4_BB* bb)
7948
7948
// sat cannot be used at all in the macro sequence
7949
7949
// make the dst GRF-aligned before expanding to macro
7950
7950
if (madwInst->getSaturate () ||
7951
+ dst->getHorzStride () != 1 ||
7951
7952
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
7952
7953
!builder.isOpndAligned (dst, getGRFSize ()))
7953
7954
{
7954
- // add a tmp mov
7955
- madwInst->setDest (insertMovAfter (it, dst, dst->getType (), bb, GRFALIGN));
7956
- dst = madwInst->getDst ();
7955
+ // add tmp mov instructions
7956
+ int dstLowGRFNum = (int )std::ceil ((float )(execSize * dst->getExecTypeSize ()) / getGRFSize ());
7957
+ int dstTotalGRFNum = dstLowGRFNum * 2 ;
7958
+
7959
+ G4_Declare* newDstDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstTotalGRFNum, dst->getType (), GRFALIGN);
7960
+
7961
+ // add a tmp mov for low results in dst
7962
+ G4_Declare* lowMovSrcDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstLowGRFNum, dst->getType (), GRFALIGN);
7963
+ lowMovSrcDcl->setAliasDeclare (newDstDcl, 0 );
7964
+ G4_SrcRegRegion* lowMovSrc = builder.createSrcRegRegion (lowMovSrcDcl, builder.getRegionStride1 ());
7965
+ auto dstLow = builder.createDst (dst->getBase (), dst->getRegOff (), dst->getSubRegOff (), dst->getHorzStride (), dst->getType ());
7966
+ G4_INST* lowMovInst = builder.createMov (execSize, dstLow, lowMovSrc, madwInst->getMaskOption (), false );
7967
+ lowMovInst->setPredicate (madwInst->getPredicate ());
7968
+ lowMovInst->setSaturate (madwInst->getSaturate ());
7969
+ auto insertIter = bb->insertAfter (it, lowMovInst);
7970
+ maintainDU4TempMov (madwInst, lowMovInst);
7971
+
7972
+ // add a tmp mov for high results in dst
7973
+ G4_Declare* hiMovSrcDcl = builder.createTempVar (numEltPerGRF (dst->getType ()) * dstLowGRFNum, dst->getType (), GRFALIGN);
7974
+ hiMovSrcDcl->setAliasDeclare (newDstDcl, dstLowGRFNum * getGRFSize ());
7975
+ G4_SrcRegRegion* hiMovSrc = builder.createSrcRegRegion (hiMovSrcDcl, builder.getRegionStride1 ());
7976
+ auto dstHi = builder.createDst (dst->getBase (), dst->getRegOff () + dstLowGRFNum, dst->getSubRegOff (), dst->getHorzStride (), dst->getType ());
7977
+ G4_INST* hiMovInst = builder.createMov (execSize, dstHi, hiMovSrc, madwInst->getMaskOption (), false );
7978
+ hiMovInst->setPredicate (madwInst->getPredicate ());
7979
+ hiMovInst->setSaturate (madwInst->getSaturate ());
7980
+ bb->insertAfter (insertIter, hiMovInst);
7981
+ maintainDU4TempMov (madwInst, hiMovInst);
7982
+
7983
+ G4_DstRegRegion* newDst = builder.createDstRegRegion (newDstDcl, 1 );
7984
+ madwInst->setDest (newDst);
7985
+ madwInst->setPredicate (nullptr );
7986
+ madwInst->setSaturate (g4::NOSAT);
7987
+ dst = newDst;
7957
7988
}
7958
7989
7959
- // G4_Type tmpType = (IS_UNSIGNED_INT(src0->getType()) && IS_UNSIGNED_INT(src1->getType()) && IS_UNSIGNED_INT(src2->getType())) ? Type_UD : Type_D;
7960
7990
INST_LIST_ITER retIter = it;
7961
7991
if (builder.noMulOrMadwExpandingBeforeScheduler () && builder.getOption (vISA_expandMadwPostSchedule))
7962
7992
{
7963
7993
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Madw->Mul+Mach+Addc+Add expanding
7964
7994
// will be done in expandMadwPostSchedule pass.
7965
7995
7966
- // sat has bee resolved above, here just set it as NOSAT
7967
- madwInst->setSaturate (g4::NOSAT);
7968
-
7969
7996
// need extra mov if dst is acc and src0 is indirect
7970
7997
if (!builder.accDstforIndirectSrc ())
7971
7998
{
0 commit comments