@@ -2882,7 +2882,8 @@ void HWConformity::fix64bInst( INST_LIST_ITER iter, G4_BB* bb )
2882
2882
for (int i = 0 , size = G4_Inst_Table[inst->opcode ()].n_srcs ; !uses64BitType && i < size; i++)
2883
2883
{
2884
2884
G4_Operand* src = inst->getSrc (i);
2885
- if (src != NULL && G4_Type_Table[src->getType ()].byteSize == 8 )
2885
+
2886
+ if (src && G4_Type_Table[src->getType ()].byteSize == 8 )
2886
2887
{
2887
2888
uses64BitType = true ;
2888
2889
}
@@ -2897,12 +2898,42 @@ void HWConformity::fix64bInst( INST_LIST_ITER iter, G4_BB* bb )
2897
2898
2898
2899
if (uses64BitType)
2899
2900
{
2900
- #if 0
2901
- //#ifdef DEBUG_VERBOSE_ON
2902
- std::cout << "CHV 64b fix for:\n";
2903
- inst->emit(std::cout);
2904
- std::cout << "\n";
2905
- #endif
2901
+
2902
+ if (builder.no64bitType () && inst->opcode () == G4_mov)
2903
+ {
2904
+ // while input should not have any ALU inst with 64b type, we may still end up
2905
+ // with 64b moves generated when preparing send payload (e.g., 64b atomics,
2906
+ // A64 messages). We fix such moves here by breaking them into 2 32b moves
2907
+ // For now only handle copy moves.
2908
+ auto dst = inst->getDst ();
2909
+ auto src0 = inst->getSrc (0 );
2910
+ assert (getTypeSize (dst->getType ()) == 8 &&
2911
+ getTypeSize (src0->getType ()) == 8 && " must be copy moves" );
2912
+ assert (src0->isSrcRegRegion () &&
2913
+ (src0->asSrcRegRegion ()->isScalar () ||
2914
+ src0->asSrcRegRegion ()->getRegion ()->isContiguous (inst->getExecSize ())) &&
2915
+ " expect src0 to be scalar or contiguous" );
2916
+ auto src0RR = src0->asSrcRegRegion ();
2917
+ assert (inst->isRawMov () && dst->getHorzStride () == 1 && " expect only copy moves" );
2918
+
2919
+ // 1st half
2920
+ auto newDst = builder.createDstRegRegion (Direct, dst->getBase (), dst->getRegOff (), dst->getSubRegOff () * 2 ,
2921
+ 2 , Type_UD);
2922
+ auto newSrc = builder.createSrcRegRegion (Mod_src_undef, Direct, src0RR->getBase (), src0RR->getRegOff (),
2923
+ src0RR->getSubRegOff () * 2 , src0RR->isScalar () ? builder.getRegionScalar () : builder.getRegionStride2 (), Type_UD);
2924
+ auto newInst = builder.createInst (nullptr , G4_mov, nullptr , false , inst->getExecSize (), newDst, newSrc, nullptr , inst->getOption ());
2925
+ bb->insert (iter, newInst);
2926
+
2927
+ // second half
2928
+ newDst = builder.createDstRegRegion (Direct, dst->getBase (), dst->getRegOff (), dst->getSubRegOff () * 2 + 1 ,
2929
+ 2 , Type_UD);
2930
+ newSrc = builder.createSrcRegRegion (Mod_src_undef, Direct, src0RR->getBase (), src0RR->getRegOff (),
2931
+ src0RR->getSubRegOff () * 2 + 1 , src0RR->isScalar () ? builder.getRegionScalar () : builder.getRegionStride2 (), Type_UD);
2932
+ newInst = builder.createInst (nullptr , G4_mov, nullptr , false , inst->getExecSize (), newDst, newSrc, nullptr , inst->getOption ());
2933
+ *iter = newInst;
2934
+ return ;
2935
+ }
2936
+
2906
2937
int numSrc = G4_Inst_Table[inst->opcode ()].n_srcs ;
2907
2938
2908
2939
// handle indirect sources first
0 commit comments