Skip to content

Commit 6fb96d3

Browse files
fangliu2020igcbot
authored andcommitted
[IGC vISA] Do not split the instruction if dst has overlap with src when fixing packed byte dst.
To resolve the packed byte dst restrition, vISA split the instruction to 2 instructions. We should avoid spliting the instruction if dst has overlap with src. Instead, inserting extra mov can resolve the packed byte dst restriction.
1 parent 521aa40 commit 6fb96d3

File tree

1 file changed

+29
-10
lines changed

1 file changed

+29
-10
lines changed

visa/HWConformity.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,6 +1682,11 @@ bool HWConformity::fixDstAlignment(INST_LIST_ITER i, G4_BB *bb, G4_Type extype,
16821682
return true;
16831683
}
16841684

1685+
if (byteDst && builder.getNativeExecSize() >= g4::SIMD16) {
1686+
// For pvc+, leave byte dst to be fixed in fixByteXBarRestriction() later
1687+
return false;
1688+
}
1689+
16851690
if (builder.hasBFMixMode() && extype == Type_F &&
16861691
inst->getDst()->getType() == Type_BF && !inst->isDpas()) {
16871692
// For now, BF mixed mode should not need this check.
@@ -6516,15 +6521,30 @@ bool HWConformity::splitInstListForByteDst(INST_LIST_ITER it, G4_BB *bb,
65166521
G4_INST *inst = *it;
65176522
G4_opcode inst_op = inst->opcode();
65186523
G4_DstRegRegion *dst = inst->getDst();
6519-
// check if we can split the inst
6520-
if (!canSplitByteDst(inst_op) || inst->getExecSize() == g4::SIMD1 ||
6521-
(!bb->isAllLaneActive() && !inst->isWriteEnableInst()) ||
6522-
dst->getByteOffset() % extypesize != 0 || dst->getHorzStride() != 1 ||
6523-
extypesize != TypeSize(Type_W)) {
6524-
return false;
6524+
6525+
bool hasDstSrcOverlap = false;
6526+
if (dst && !inst->hasNULLDst()) {
6527+
auto srcNum = inst->getNumSrc();
6528+
for (int i = 0; i < srcNum; i++) {
6529+
G4_CmpRelation rel = dst->compareOperand(inst->getSrc(i), builder);
6530+
if (rel != Rel_disjoint) {
6531+
hasDstSrcOverlap = true;
6532+
break;
6533+
}
6534+
}
65256535
}
65266536

6527-
if (inst->getPredicate() || inst->getCondMod()) {
6537+
// check if we can split the inst
6538+
if (!canSplitByteDst(inst_op) ||
6539+
inst->getExecSize() == g4::SIMD1 ||
6540+
(!bb->isAllLaneActive() && !inst->isWriteEnableInst()) ||
6541+
dst->getByteOffset() % extypesize != 0 ||
6542+
dst->getHorzStride() != 1 ||
6543+
extypesize != TypeSize(Type_W) ||
6544+
inst->getPredicate() ||
6545+
inst->getCondMod() ||
6546+
// Do not split the instruction if dst has overlap with sources
6547+
hasDstSrcOverlap) {
65286548
return false;
65296549
}
65306550

@@ -9076,7 +9096,6 @@ void HWConformity::fixByteXBarRestriction(INST_LIST_ITER it, G4_BB *bb) {
90769096
}
90779097
unsigned int new_option = inst->getMaskOption();
90789098
auto pos = it;
9079-
pos++;
90809099
auto dstride = dst->getHorzStride();
90819100
const RegionDesc *shiftRegion = builder.createRegionDesc(dstride, 1, 0);
90829101
G4_Declare *shiftDcl = builder.createTempVar(
@@ -9088,12 +9107,12 @@ void HWConformity::fixByteXBarRestriction(INST_LIST_ITER it, G4_BB *bb) {
90889107
G4_INST *packInst = builder.createMov(inst->getExecSize(), packTmp,
90899108
unpackSrc, new_option, false);
90909109
packInst->setPredicate(pred);
9091-
bb->insertBefore(pos, packInst);
9110+
pos = bb->insertAfter(pos, packInst);
90929111
// then shift the bytes and words location
90939112
G4_INST *shiftInst = builder.createMov(inst->getExecSize(), dst, shiftSrc,
90949113
new_option, false);
90959114
shiftInst->setPredicate(pred);
9096-
bb->insertBefore(pos, shiftInst);
9115+
pos = bb->insertAfter(pos, shiftInst);
90979116
// update propagation info
90989117
maintainDU4TempMov(inst, shiftInst);
90999118
// change the destination of the original instruction

0 commit comments

Comments
 (0)