Skip to content

Commit df8a499

Browse files
anikaushikigcbot
authored andcommitted
Corrections to logic for coalescing scalar immediate moves
This PR adds corrections to the logic for coalescing scalar immediate moves. The first correction addresses logic bug obstructing opportunities for coalescing movs with identical immediate values and second correction adds an additional condition to check if the src operand type is not qword.
1 parent 3a6e828 commit df8a499

File tree

2 files changed

+36
-19
lines changed

2 files changed

+36
-19
lines changed

IGC/common/igc_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ DECLARE_IGC_REGKEY(bool, UseLinearScanRA, false, "use Linear Scan
117117
DECLARE_IGC_REGKEY(bool, DisableWriteCombine, false, "Disable write combine. PVC+ only", false)
118118
DECLARE_IGC_REGKEY(bool, Force32bitConstantGEPLowering, false, "Go back to old version of GEP lowering for constant address space. PVC only", false)
119119
DECLARE_IGC_REGKEY(bool, NewSpillCostFunction, false, "Use new spill cost function in VISA RA", false)
120-
DECLARE_IGC_REGKEY(bool, EnableCoalesceScalarMoves, false, "Enable scalar moves to be coalesced into fewer moves", false)
120+
DECLARE_IGC_REGKEY(bool, EnableCoalesceScalarMoves, true, "Enable scalar moves to be coalesced into fewer moves", false)
121121

122122
DECLARE_IGC_GROUP("IGC Optimization")
123123
DECLARE_IGC_REGKEY(bool, AllowMem2Reg, false, "Setting this to true makes IGC run mem2reg even when optimizations are disabled", true)

visa/Passes/MergeScalars.cpp

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -242,16 +242,22 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
242242
// lambda to compute the data type based on how many values we
243243
// packed into a single value
244244
auto dataTypeSize = [&](G4_Operand *dst) {
245+
// size is a power of 2; size is round down to the lowest power of 2
246+
// before performing the merge; for example, a size of 3 is round down
247+
// to size of 2.
248+
// size is capped to sizeLimit, which is 4
245249
uint32_t totalBytes = size * dst->getTypeSize();
246250
if (totalBytes < 4)
247-
return std::make_tuple(Type_UW, 2);
251+
return Type_UW;
248252
if (totalBytes == 4)
249-
return std::make_tuple(Type_UD, 4);
253+
return Type_UD;
250254
if (totalBytes > 4 && totalBytes <= 8)
251-
return std::make_tuple(Type_UQ, 8);
252-
else
253-
MUST_BE_TRUE(false, "invalid data type size");
254-
return std::make_tuple(Type_UNDEF, 0);
255+
// check if the platform supports 64bit moves
256+
if (builder.noInt64()) return Type_UNDEF;
257+
return Type_UQ;
258+
// otherwise, the size is bigger than available data types
259+
// hence, return undefined type
260+
return Type_UNDEF;
255261
};
256262

257263
// In canMergeSource, we check only on whether the instructions
@@ -263,16 +269,21 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
263269
// 3. check if mov execution mask is no mask SIMD1
264270
// 4. check if srcs in bundle are immediate values (handled in
265271
// canMergeSrc)
266-
267-
if (!inst[0]->isMov())
268-
return false;
269-
for (int j = 1; j < size; j++) {
270-
if (!inst[j]->isMov() || inst[j - 1]->getDst()->getTopDcl() !=
272+
// 5. check if destination type is not float (soft constraint, must address)
273+
// 6. check is the total size of packed data type is less than equal to
274+
// largest datatype size (qword).
275+
for (int j = 0; j < size; j++) {
276+
if (!inst[j]->isMov()) return false;
277+
if (!IS_TYPE_INT(inst[j]->getDst()->getType())) return false;
278+
if (j > 0 && inst[j - 1]->getDst()->getTopDcl() !=
271279
inst[j]->getDst()->getTopDcl()) {
272280
return false;
273281
}
274282
}
275283

284+
auto packedType = dataTypeSize(newInst->getDst());
285+
if (packedType == Type_UNDEF) return false;
286+
276287
// create the packed value
277288
// since we also create packed values with non-motonic subregs, the
278289
// shift amount is subregID * type size (bytes) * 8
@@ -300,9 +311,8 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
300311
packedVal += ((uint64_t)val << shiftVal);
301312
}
302313

303-
auto packedTypeSize = dataTypeSize(newInst->getDst());
304-
G4_Type packedType = std::get<0>(packedTypeSize);
305-
unsigned packedSize = std::get<1>(packedTypeSize);
314+
315+
unsigned packedSize = G4_Type_Table[packedType].byteSize;
306316
// check alignment
307317
// if destination alignment is less than the datatype of packed value, we
308318
// cannot do the coalescing
@@ -315,6 +325,7 @@ bool BUNDLE_INFO::doMerge(IR_Builder &builder,
315325

316326
// create a packed type dcl
317327
G4_Declare *newDcl = builder.createTempVar(0, packedType, Any, "Packed");
328+
newDcl->copyAlign(newInst->getDst()->getTopDcl());
318329
// set the newDcl dcl alias to the instruction destination dcl
319330
newDcl->setAliasDeclare(newInst->getDst()->getTopDcl(),
320331
newInst->getDst()->getSubRegOff());
@@ -614,14 +625,20 @@ bool BUNDLE_INFO::canMergeSource(G4_Operand *src, int srcPos,
614625
// no coalescing of immediate values possible
615626
return false;
616627
} else {
617-
if (builder.getOption(vISA_CoalesceScalarMoves)) {
618-
if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
628+
// coalescing can only be done for data types less than Q/UQ
629+
// this is because we are coalescing several moves into a single mov where
630+
// the src data type is widened; cannot widen beyond Q/UQ
631+
if (builder.getOption(vISA_CoalesceScalarMoves) &&
632+
!IS_QTYPE(src->getType())) {
633+
if (prevSrc->asImm()->getImm() == src->asImm()->getImm()) {
634+
// if the values are the same, then favor opportunities for wider SIMD
635+
// moves to broadcast constant value
636+
srcPattern[srcPos] = OPND_PATTERN::IDENTICAL;
637+
} else if (dstPattern == OPND_PATTERN::CONTIGUOUS) {
619638
// writing immediate values to different subregs of same
620639
// GRF
621640
srcPattern[srcPos] = OPND_PATTERN::PACKED;
622641
} else {
623-
// destination pattern is something other than contiguous
624-
// we cannot do packing in this case
625642
return false;
626643
}
627644
} else if (prevSrc->asImm()->getImm() == src->asImm()->getImm()) {

0 commit comments

Comments
 (0)