@@ -2903,6 +2903,8 @@ bool HWConformity::emulate64bMov(INST_LIST_ITER iter, G4_BB *bb) {
2903
2903
2904
2904
if (src0->isSrcRegRegion ()) {
2905
2905
auto src0RR = src0->asSrcRegRegion ();
2906
+ vISA_ASSERT (IS_INT (src0RR->getType ()) && IS_INT (dst->getType ()),
2907
+ " expecting int types on src, dst" );
2906
2908
vISA_ASSERT (src0RR->getModifier () == Mod_src_undef,
2907
2909
" cannot handle saturation" );
2908
2910
@@ -8016,6 +8018,46 @@ uint16_t HWConformity::getSrcStride(G4_SrcRegRegion *src) {
8016
8018
return srcStride;
8017
8019
};
8018
8020
8021
+ void HWConformity::change64bStride2CopyToUD (INST_LIST_ITER it, G4_BB *bb) {
8022
+ G4_INST *inst = *it;
8023
+ G4_Operand *src = inst->getSrc (0 );
8024
+ vISA_ASSERT (src != nullptr && src->isSrcRegRegion (),
8025
+ " source must be a SrcRegRegion" );
8026
+ G4_SrcRegRegion *origSrc = src->asSrcRegRegion ();
8027
+ G4_Type execType = inst->getDst ()->getType ();
8028
+ uint16_t stride = inst->getDst ()->getHorzStride ();
8029
+ short dstRegOff = inst->getDst ()->getRegOff ();
8030
+ short dstSubRegOff = inst->getDst ()->getSubRegOff ();
8031
+
8032
+ vISA_ASSERT (execType == Type_Q || execType == Type_DF,
8033
+ " Only 64b data type support" );
8034
+ execType = Type_UD;
8035
+ dstSubRegOff *= 2 ;
8036
+
8037
+ G4_DstRegRegion *newDst =
8038
+ builder.createDst (inst->getDst ()->getBase (), dstRegOff, dstSubRegOff + 1 ,
8039
+ stride * 2 , execType);
8040
+ G4_SrcRegRegion *newSrc = builder.createSrcRegRegion (
8041
+ origSrc->getModifier (), Direct, origSrc->getBase (), origSrc->getRegOff (),
8042
+ origSrc->getSubRegOff () * 2 + 1 , builder.createRegionDesc (2 , 1 , 0 ),
8043
+ Type_UD);
8044
+ inst->setSrc (newSrc, 0 );
8045
+ inst->setDest (newDst);
8046
+
8047
+ G4_DstRegRegion *newDst1 = builder.createDst (
8048
+ inst->getDst ()->getBase (), dstRegOff, dstSubRegOff, stride * 2 , execType);
8049
+ G4_SrcRegRegion *newSrc1 = builder.createSrcRegRegion (
8050
+ origSrc->getModifier (), Direct, origSrc->getBase (), origSrc->getRegOff (),
8051
+ origSrc->getSubRegOff () * 2 , builder.createRegionDesc (2 , 1 , 0 ), Type_UD);
8052
+
8053
+ G4_INST *movInst = builder.createMov (inst->getExecSize (), newDst1, newSrc1,
8054
+ inst->getOption (), false );
8055
+
8056
+ INST_LIST_ITER iter = it;
8057
+ iter++;
8058
+ bb->insertBefore (it, movInst);
8059
+ }
8060
+
8019
8061
// on XeHP_SDV we have to make sure each source element is alignd to each dst
8020
8062
// element for all float/64b inst (packed HF is ok in mixed mode inst) For all
8021
8063
// violating instructions, we align each operand to the execution type for float
@@ -8107,8 +8149,15 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB *bb) {
8107
8149
// for packed 64b copy moves that are not under divergent CF, we can
8108
8150
// change its type to UD
8109
8151
change64bCopyToUD (inst, srcStride / inst->getSrc (0 )->getTypeSize ());
8110
- } else if (srcStride != 0 ) {
8152
+ } else if (isNoMaskInst && inst->getDst ()->getHorzStride () == 2 &&
8153
+ execTyWidth == 8 &&
8154
+ src0RR->getRegion ()->isContiguous (inst->getExecSize ())) {
8155
+ change64bStride2CopyToUD (it, bb);
8156
+ } else if (execTyWidth == 8 && IS_TYPE_INT (dstTy) &&
8157
+ IS_TYPE_INT (src0RR->getType ()) && srcStride != 0 &&
8158
+ !src0RR->isIndirect ()) {
8111
8159
// we can split 64b moves with single source stride into 2UD moves
8160
+ // ToDo: check if this subsumes the previous else if
8112
8161
emulate64bMov (it, bb);
8113
8162
} else {
8114
8163
// a move we don't know how to handle without inserting more moves
0 commit comments