@@ -8554,6 +8554,35 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB* bb)
8554
8554
}
8555
8555
}
8556
8556
8557
+ if (builder.hasFtoPackedHFMove () && inst->opcode () == G4_mov)
8558
+ {
8559
+ G4_Operand* src0 = inst->getSrc (0 );
8560
+ G4_Type src0Ty = src0->getType ();
8561
+ G4_SrcRegRegion* reg0 = src0->isSrcRegRegion () ? src0->asSrcRegRegion () : nullptr ;
8562
+ bool src0IsScalar = (!reg0 || reg0->getRegion ()->isScalar ());
8563
+ if (!src0IsScalar &&
8564
+ ((dstTy == Type_HF && dst->getHorzStride () == 1 && src0Ty == Type_F) ||
8565
+ (dstTy == Type_F && src0Ty == Type_HF && reg0->getRegion ()->isContiguous (inst->getExecSize ()))))
8566
+ {
8567
+ uint32_t dstOffBytes = dst->getSubRegOff () * dst->getTypeSize ();
8568
+ uint32_t src0OffBytes = reg0->getSubRegOff () * reg0->getTypeSize ();
8569
+ const uint32_t halfGRFBytes = kernel.numEltPerGRF <Type_UB>() / 2 ;
8570
+ // For F, use the half of its offset!
8571
+ dstOffBytes = (dstTy == Type_F ? dstOffBytes / 2 : dstOffBytes);
8572
+ src0OffBytes = (src0Ty == Type_F ? src0OffBytes / 2 : src0OffBytes);
8573
+ const bool isAligned = (dstOffBytes % halfGRFBytes) == (src0OffBytes % halfGRFBytes);
8574
+ if ((!isAligned && dstOffBytes != 0 ) || (dstTy == Type_F && dst->getHorzStride () != 1 ))
8575
+ {
8576
+ inst->setDest (insertMovAfter (it, dst, dst->getType (), bb, builder.getGRFAlign ()));
8577
+ }
8578
+ if ((!isAligned && src0OffBytes != 0 ) ||
8579
+ (src0Ty == Type_F && !reg0->getRegion ()->isContiguous (inst->getExecSize ())))
8580
+ {
8581
+ inst->setSrc (insertMovBefore (it, 0 , src0Ty, bb, builder.getGRFAlign ()), 0 );
8582
+ }
8583
+ }
8584
+ }
8585
+
8557
8586
// fix Dst if necessary
8558
8587
// some special mix mode dst are allowed provided the instruction has F type:
8559
8588
// r1.0<2>:bf
0 commit comments