@@ -242,22 +242,16 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
242
242
// lambda to compute the data type based on how many values we
243
243
// packed into a single value
244
244
auto dataTypeSize = [&](G4_Operand *dst) {
245
- // size is a power of 2; size is round down to the lowest power of 2
246
- // before performing the merge; for example, a size of 3 is round down
247
- // to size of 2.
248
- // size is capped to sizeLimit, which is 4
249
245
uint32_t totalBytes = size * dst->getTypeSize ();
250
246
if (totalBytes < 4 )
251
- return Type_UW;
247
+ return std::make_tuple ( Type_UW, 2 ) ;
252
248
if (totalBytes == 4 )
253
- return Type_UD;
249
+ return std::make_tuple ( Type_UD, 4 ) ;
254
250
if (totalBytes > 4 && totalBytes <= 8 )
255
- // check if the platform supports 64bit moves
256
- if (builder.noInt64 ()) return Type_UNDEF;
257
- return Type_UQ;
258
- // otherwise, the size is bigger than available data types
259
- // hence, return undefined type
260
- return Type_UNDEF;
251
+ return std::make_tuple (Type_UQ, 8 );
252
+ else
253
+ MUST_BE_TRUE (false , " invalid data type size" );
254
+ return std::make_tuple (Type_UNDEF, 0 );
261
255
};
262
256
263
257
// In canMergeSource, we check only on whether the instructions
@@ -269,21 +263,16 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
269
263
// 3. check if mov execution mask is no mask SIMD1
270
264
// 4. check if srcs in bundle are immediate values (handled in
271
265
// canMergeSrc)
272
- // 5. check if destination type is not float (soft constraint, must address)
273
- // 6. check is the total size of packed data type is less than equal to
274
- // largest datatype size (qword).
275
- for (int j = 0 ; j < size; j++) {
276
- if (!inst[j]->isMov ()) return false ;
277
- if (!IS_TYPE_INT (inst[j]->getDst ()->getType ())) return false ;
278
- if (j > 0 && inst[j - 1 ]->getDst ()->getTopDcl () !=
266
+
267
+ if (!inst[0 ]->isMov ())
268
+ return false ;
269
+ for (int j = 1 ; j < size; j++) {
270
+ if (!inst[j]->isMov () || inst[j - 1 ]->getDst ()->getTopDcl () !=
279
271
inst[j]->getDst ()->getTopDcl ()) {
280
272
return false ;
281
273
}
282
274
}
283
275
284
- auto packedType = dataTypeSize (newInst->getDst ());
285
- if (packedType == Type_UNDEF) return false ;
286
-
287
276
// create the packed value
288
277
// since we also create packed values with non-motonic subregs, the
289
278
// shift amount is subregID * type size (bytes) * 8
@@ -311,8 +300,9 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
311
300
packedVal += ((uint64_t )val << shiftVal);
312
301
}
313
302
314
-
315
- unsigned packedSize = G4_Type_Table[packedType].byteSize ;
303
+ auto packedTypeSize = dataTypeSize (newInst->getDst ());
304
+ G4_Type packedType = std::get<0 >(packedTypeSize);
305
+ unsigned packedSize = std::get<1 >(packedTypeSize);
316
306
// check alignment
317
307
// if destination alignment is less than the datatype of packed value, we
318
308
// cannot do the coalescing
@@ -325,7 +315,6 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
325
315
326
316
// create a packed type dcl
327
317
G4_Declare *newDcl = builder.createTempVar (0 , packedType, Any, " Packed" );
328
- newDcl->copyAlign (newInst->getDst ()->getTopDcl ());
329
318
// set the newDcl dcl alias to the instruction destination dcl
330
319
newDcl->setAliasDeclare (newInst->getDst ()->getTopDcl (),
331
320
newInst->getDst ()->getSubRegOff ());
@@ -625,20 +614,14 @@ bool BUNDLE_INFO::canMergeSource(G4_Operand *src, int srcPos,
625
614
// no coalescing of immediate values possible
626
615
return false ;
627
616
} else {
628
- // coalescing can only be done for data types less than Q/UQ
629
- // this is because we are coalescing several moves into a single mov where
630
- // the src data type is widened; cannot widen beyond Q/UQ
631
- if (builder.getOption (vISA_CoalesceScalarMoves) &&
632
- !IS_QTYPE (src->getType ())) {
633
- if (prevSrc->asImm ()->getImm () == src->asImm ()->getImm ()) {
634
- // if the values are the same, then favor opportunities for wider SIMD
635
- // moves to broadcast constant value
636
- srcPattern[srcPos] = OPND_PATTERN::IDENTICAL;
637
- } else if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
617
+ if (builder.getOption (vISA_CoalesceScalarMoves)) {
618
+ if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
638
619
// writing immediate values to different subregs of same
639
620
// GRF
640
621
srcPattern[srcPos] = OPND_PATTERN::PACKED;
641
622
} else {
623
+ // destination pattern is something other than contiguous
624
+ // we cannot do packing in this case
642
625
return false ;
643
626
}
644
627
} else if (prevSrc->asImm ()->getImm () == src->asImm ()->getImm ()) {
0 commit comments