Skip to content

Commit 261c0f6

Browse files
jgu222igcbot
authored andcommitted
Fixed wrong region for packed F->HF
For packed F->HF, src region should be continguous.
1 parent 95c299f commit 261c0f6

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

visa/HWConformity.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8554,6 +8554,35 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB* bb)
85548554
}
85558555
}
85568556

8557+
if (builder.hasFtoPackedHFMove() && inst->opcode() == G4_mov)
8558+
{
8559+
G4_Operand* src0 = inst->getSrc(0);
8560+
G4_Type src0Ty = src0->getType();
8561+
G4_SrcRegRegion* reg0 = src0->isSrcRegRegion() ? src0->asSrcRegRegion() : nullptr;
8562+
bool src0IsScalar = (!reg0 || reg0->getRegion()->isScalar());
8563+
if (!src0IsScalar &&
8564+
((dstTy == Type_HF && dst->getHorzStride() == 1 && src0Ty == Type_F) ||
8565+
(dstTy == Type_F && src0Ty == Type_HF && reg0->getRegion()->isContiguous(inst->getExecSize()))))
8566+
{
8567+
uint32_t dstOffBytes = dst->getSubRegOff() * dst->getTypeSize();
8568+
uint32_t src0OffBytes = reg0->getSubRegOff() * reg0->getTypeSize();
8569+
const uint32_t halfGRFBytes = kernel.numEltPerGRF<Type_UB>() / 2;
8570+
// For F, use the half of its offset!
8571+
dstOffBytes = (dstTy == Type_F ? dstOffBytes / 2 : dstOffBytes);
8572+
src0OffBytes = (src0Ty == Type_F ? src0OffBytes / 2 : src0OffBytes);
8573+
const bool isAligned = (dstOffBytes % halfGRFBytes) == (src0OffBytes % halfGRFBytes);
8574+
if ((!isAligned && dstOffBytes != 0) || (dstTy == Type_F && dst->getHorzStride() != 1))
8575+
{
8576+
inst->setDest(insertMovAfter(it, dst, dst->getType(), bb, builder.getGRFAlign()));
8577+
}
8578+
if ((!isAligned && src0OffBytes != 0) ||
8579+
(src0Ty == Type_F && !reg0->getRegion()->isContiguous(inst->getExecSize())))
8580+
{
8581+
inst->setSrc(insertMovBefore(it, 0, src0Ty, bb, builder.getGRFAlign()), 0);
8582+
}
8583+
}
8584+
}
8585+
85578586
// fix Dst if necessary
85588587
// some special mix mode dst are allowed provided the instruction has F type:
85598588
// r1.0<2>:bf

0 commit comments

Comments
 (0)