@@ -242,16 +242,22 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
242
242
// lambda to compute the data type based on how many values we
243
243
// packed into a single value
244
244
auto dataTypeSize = [&](G4_Operand *dst) {
245
+ // size is a power of 2; size is round down to the lowest power of 2
246
+ // before performing the merge; for example, a size of 3 is round down
247
+ // to size of 2.
248
+ // size is capped to sizeLimit, which is 4
245
249
uint32_t totalBytes = size * dst->getTypeSize ();
246
250
if (totalBytes < 4 )
247
- return std::make_tuple ( Type_UW, 2 ) ;
251
+ return Type_UW;
248
252
if (totalBytes == 4 )
249
- return std::make_tuple ( Type_UD, 4 ) ;
253
+ return Type_UD;
250
254
if (totalBytes > 4 && totalBytes <= 8 )
251
- return std::make_tuple (Type_UQ, 8 );
252
- else
253
- MUST_BE_TRUE (false , " invalid data type size" );
254
- return std::make_tuple (Type_UNDEF, 0 );
255
+ // check if the platform supports 64bit moves
256
+ if (builder.noInt64 ()) return Type_UNDEF;
257
+ return Type_UQ;
258
+ // otherwise, the size is bigger than available data types
259
+ // hence, return undefined type
260
+ return Type_UNDEF;
255
261
};
256
262
257
263
// In canMergeSource, we check only on whether the instructions
@@ -263,16 +269,21 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
263
269
// 3. check if mov execution mask is no mask SIMD1
264
270
// 4. check if srcs in bundle are immediate values (handled in
265
271
// canMergeSrc)
266
-
267
- if (!inst[0 ]->isMov ())
268
- return false ;
269
- for (int j = 1 ; j < size; j++) {
270
- if (!inst[j]->isMov () || inst[j - 1 ]->getDst ()->getTopDcl () !=
272
+ // 5. check if destination type is not float (soft constraint, must address)
273
+ // 6. check is the total size of packed data type is less than equal to
274
+ // largest datatype size (qword).
275
+ for (int j = 0 ; j < size; j++) {
276
+ if (!inst[j]->isMov ()) return false ;
277
+ if (!IS_TYPE_INT (inst[j]->getDst ()->getType ())) return false ;
278
+ if (j > 0 && inst[j - 1 ]->getDst ()->getTopDcl () !=
271
279
inst[j]->getDst ()->getTopDcl ()) {
272
280
return false ;
273
281
}
274
282
}
275
283
284
+ auto packedType = dataTypeSize (newInst->getDst ());
285
+ if (packedType == Type_UNDEF) return false ;
286
+
276
287
// create the packed value
277
288
// since we also create packed values with non-motonic subregs, the
278
289
// shift amount is subregID * type size (bytes) * 8
@@ -300,9 +311,8 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
300
311
packedVal += ((uint64_t )val << shiftVal);
301
312
}
302
313
303
- auto packedTypeSize = dataTypeSize (newInst->getDst ());
304
- G4_Type packedType = std::get<0 >(packedTypeSize);
305
- unsigned packedSize = std::get<1 >(packedTypeSize);
314
+
315
+ unsigned packedSize = G4_Type_Table[packedType].byteSize ;
306
316
// check alignment
307
317
// if destination alignment is less than the datatype of packed value, we
308
318
// cannot do the coalescing
@@ -315,6 +325,7 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
315
325
316
326
// create a packed type dcl
317
327
G4_Declare *newDcl = builder.createTempVar (0 , packedType, Any, " Packed" );
328
+ newDcl->copyAlign (newInst->getDst ()->getTopDcl ());
318
329
// set the newDcl dcl alias to the instruction destination dcl
319
330
newDcl->setAliasDeclare (newInst->getDst ()->getTopDcl (),
320
331
newInst->getDst ()->getSubRegOff ());
@@ -614,14 +625,20 @@ bool BUNDLE_INFO::canMergeSource(G4_Operand *src, int srcPos,
614
625
// no coalescing of immediate values possible
615
626
return false ;
616
627
} else {
617
- if (builder.getOption (vISA_CoalesceScalarMoves)) {
618
- if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
628
+ // coalescing can only be done for data types less than Q/UQ
629
+ // this is because we are coalescing several moves into a single mov where
630
+ // the src data type is widened; cannot widen beyond Q/UQ
631
+ if (builder.getOption (vISA_CoalesceScalarMoves) &&
632
+ !IS_QTYPE (src->getType ())) {
633
+ if (prevSrc->asImm ()->getImm () == src->asImm ()->getImm ()) {
634
+ // if the values are the same, then favor opportunities for wider SIMD
635
+ // moves to broadcast constant value
636
+ srcPattern[srcPos] = OPND_PATTERN::IDENTICAL;
637
+ } else if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
619
638
// writing immediate values to different subregs of same
620
639
// GRF
621
640
srcPattern[srcPos] = OPND_PATTERN::PACKED;
622
641
} else {
623
- // destination pattern is something other than contiguous
624
- // we cannot do packing in this case
625
642
return false ;
626
643
}
627
644
} else if (prevSrc->asImm ()->getImm () == src->asImm ()->getImm ()) {
0 commit comments