Skip to content

Commit a36a041

Browse files
committed
[IGC vISA] Revert: Fixed the register region not aligned issue for 64-bits data types
1, fixed the infinite loop issue when handling the unaligned register region for :df data type. 2, some code refactory: remove change64bStride2CopyToUD() as it can be covered by emulate64bMov().
1 parent bdaea42 commit a36a041

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

visa/HWConformity.cpp

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2903,6 +2903,8 @@ bool HWConformity::emulate64bMov(INST_LIST_ITER iter, G4_BB *bb) {
29032903

29042904
if (src0->isSrcRegRegion()) {
29052905
auto src0RR = src0->asSrcRegRegion();
2906+
vISA_ASSERT(IS_INT(src0RR->getType()) && IS_INT(dst->getType()),
2907+
"expecting int types on src, dst");
29062908
vISA_ASSERT(src0RR->getModifier() == Mod_src_undef,
29072909
"cannot handle saturation");
29082910

@@ -8016,6 +8018,46 @@ uint16_t HWConformity::getSrcStride(G4_SrcRegRegion *src) {
80168018
return srcStride;
80178019
};
80188020

8021+
void HWConformity::change64bStride2CopyToUD(INST_LIST_ITER it, G4_BB *bb) {
8022+
G4_INST *inst = *it;
8023+
G4_Operand *src = inst->getSrc(0);
8024+
vISA_ASSERT(src != nullptr && src->isSrcRegRegion(),
8025+
"source must be a SrcRegRegion");
8026+
G4_SrcRegRegion *origSrc = src->asSrcRegRegion();
8027+
G4_Type execType = inst->getDst()->getType();
8028+
uint16_t stride = inst->getDst()->getHorzStride();
8029+
short dstRegOff = inst->getDst()->getRegOff();
8030+
short dstSubRegOff = inst->getDst()->getSubRegOff();
8031+
8032+
vISA_ASSERT(execType == Type_Q || execType == Type_DF,
8033+
"Only 64b data type support");
8034+
execType = Type_UD;
8035+
dstSubRegOff *= 2;
8036+
8037+
G4_DstRegRegion *newDst =
8038+
builder.createDst(inst->getDst()->getBase(), dstRegOff, dstSubRegOff + 1,
8039+
stride * 2, execType);
8040+
G4_SrcRegRegion *newSrc = builder.createSrcRegRegion(
8041+
origSrc->getModifier(), Direct, origSrc->getBase(), origSrc->getRegOff(),
8042+
origSrc->getSubRegOff() * 2 + 1, builder.createRegionDesc(2, 1, 0),
8043+
Type_UD);
8044+
inst->setSrc(newSrc, 0);
8045+
inst->setDest(newDst);
8046+
8047+
G4_DstRegRegion *newDst1 = builder.createDst(
8048+
inst->getDst()->getBase(), dstRegOff, dstSubRegOff, stride * 2, execType);
8049+
G4_SrcRegRegion *newSrc1 = builder.createSrcRegRegion(
8050+
origSrc->getModifier(), Direct, origSrc->getBase(), origSrc->getRegOff(),
8051+
origSrc->getSubRegOff() * 2, builder.createRegionDesc(2, 1, 0), Type_UD);
8052+
8053+
G4_INST *movInst = builder.createMov(inst->getExecSize(), newDst1, newSrc1,
8054+
inst->getOption(), false);
8055+
8056+
INST_LIST_ITER iter = it;
8057+
iter++;
8058+
bb->insertBefore(it, movInst);
8059+
}
8060+
80198061
// on XeHP_SDV we have to make sure each source element is alignd to each dst
80208062
// element for all float/64b inst (packed HF is ok in mixed mode inst) For all
80218063
// violating instructions, we align each operand to the execution type for float
@@ -8107,8 +8149,15 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB *bb) {
81078149
// for packed 64b copy moves that are not under divergent CF, we can
81088150
// change its type to UD
81098151
change64bCopyToUD(inst, srcStride / inst->getSrc(0)->getTypeSize());
8110-
} else if (srcStride != 0) {
8152+
} else if (isNoMaskInst && inst->getDst()->getHorzStride() == 2 &&
8153+
execTyWidth == 8 &&
8154+
src0RR->getRegion()->isContiguous(inst->getExecSize())) {
8155+
change64bStride2CopyToUD(it, bb);
8156+
} else if (execTyWidth == 8 && IS_TYPE_INT(dstTy) &&
8157+
IS_TYPE_INT(src0RR->getType()) && srcStride != 0 &&
8158+
!src0RR->isIndirect()) {
81118159
// we can split 64b moves with single source stride into 2UD moves
8160+
// ToDo: check if this subsumes the previous else if
81128161
emulate64bMov(it, bb);
81138162
} else {
81148163
// a move we don't know how to handle without inserting more moves

visa/HWConformity.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ class HWConformity {
175175

176176
bool isFloatOr64b(G4_INST *inst);
177177
uint16_t getSrcStride(G4_SrcRegRegion *src);
178+
void change64bStride2CopyToUD(INST_LIST_ITER it, G4_BB *bb);
178179
bool fixBFMove(INST_LIST_ITER i, G4_BB *bb);
179180
void fixUnalignedRegions(INST_LIST_ITER it, G4_BB *bb);
180181
bool fixFcvt(INST_LIST_ITER i, G4_BB *bb);

0 commit comments

Comments
 (0)