Skip to content

Commit d27c900

Browse files
weiyu-chensys_zuul
authored andcommitted
Fix WaDisableSendSrcDstOverlap() for instructions in divergent control flow.
Change-Id: Icaef6ccc031bdfc327b34c61d6b8156b47cacbac
1 parent 0a649e2 commit d27c900

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

visa/HWCaps.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@
434434
bool WaDisableSendSrcDstOverlap() const
435435
{
436436
return getOption(vISA_noSendSrcDstOverlap) ||
437-
(m_options->getTarget() == VISA_CM && getPlatform() >= GENX_SKL) ||
437+
(m_options->getTarget() == VISA_CM && getPlatform() >= GENX_SKL && getPlatform() < GENX_TGLLP) ||
438438
getPlatform() == GENX_ICLLP;
439439
}
440440

visa/HWConformity.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5840,19 +5840,19 @@ void HWConformity::fixSendInst(G4_BB* bb)
58405840
int dstSize = inst->getMsgDesc()->ResponseLength();
58415841
int src0Size = src0Overlap ? inst->getMsgDesc()->MessageLength() : 0;
58425842
int src1Size = src1Overlap ? inst->getMsgDesc()->extMessageLength() : 0;
5843-
if (dstSize > src0Size + src1Size)
5843+
if (inst->getPredicate() || (bb->isDivergent() && !inst->isWriteEnableInst()) || dstSize > src0Size + src1Size)
58445844
{
5845-
//copy src0/src1
5845+
//copy src0/src1 if inst does not update all channels
58465846
if (src0Overlap)
58475847
{
5848-
G4_Declare* copyDst = builder.createTempVar(src0Size * 8, Type_UD, Any);
5848+
G4_Declare* copyDst = builder.createTempVar(src0Size * NUM_DWORDS_PER_GRF, Type_UD, Any);
58495849
copyRegs(copyDst, 0, inst->getSrc(0)->getBase()->asRegVar()->getDeclare(),
58505850
inst->getSrc(0)->asSrcRegRegion()->getRegOff() * getGRFSize(), src0Size, bb, i);
58515851
inst->setSrc(builder.Create_Src_Opnd_From_Dcl(copyDst, builder.getRegionStride1()), 0);
58525852
}
58535853
if (src1Overlap)
58545854
{
5855-
G4_Declare* copyDst = builder.createTempVar(src1Size * 8, Type_UD, Any);
5855+
G4_Declare* copyDst = builder.createTempVar(src1Size * NUM_DWORDS_PER_GRF, Type_UD, Any);
58565856
copyRegs(copyDst, 0, inst->getSrc(1)->getBase()->asRegVar()->getDeclare(),
58575857
inst->getSrc(1)->asSrcRegRegion()->getRegOff() * getGRFSize(), src1Size, bb, i);
58585858
inst->setSrc(builder.Create_Src_Opnd_From_Dcl(copyDst, builder.getRegionStride1()), 1);
@@ -5863,7 +5863,7 @@ void HWConformity::fixSendInst(G4_BB* bb)
58635863
// copy dst
58645864
auto copyIter = i;
58655865
++copyIter;
5866-
G4_Declare* copySrc = builder.createTempVar(dstSize * 8, Type_UD, Any);
5866+
G4_Declare* copySrc = builder.createTempVar(dstSize * NUM_DWORDS_PER_GRF, Type_UD, Any);
58675867
copyRegs(inst->getDst()->getBase()->asRegVar()->getDeclare(), inst->getDst()->getRegOff() * getGRFSize(),
58685868
copySrc, 0, dstSize, bb, copyIter);
58695869
inst->setDest(builder.Create_Dst_Opnd_From_Dcl(copySrc, 1));

0 commit comments

Comments
 (0)