Skip to content

Commit 765ac0f

Browse files
fangliu2020igcbot
authored andcommitted
Enable expandMulPostSchedule pass by default
1 parent 6bb7bd0 commit 765ac0f

File tree

4 files changed

+45
-46
lines changed

4 files changed

+45
-46
lines changed

visa/HWConformity.cpp

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20482048

20492049
//need extra move for dst
20502050
if (!IS_DTYPE(origDst->getType()) || origDst->getHorzStride() != 1 ||
2051-
!builder.isOpndAligned(origDst, 32))
2051+
!builder.isOpndAligned(origDst, getGRFSize()))
20522052
{
20532053
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054-
G4_DstRegRegion* tmpDst = insertMovAfter(it, origDst, tmpType, bb);
2054+
G4_DstRegRegion* tmpDst = insertMovAfter(it, origDst, tmpType, bb, GRFALIGN);
20552055
mulInst->setDest(tmpDst);
20562056
}
20572057
}
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
20902090
machIter = bb->insertBefore(++machIter, maclInst);
20912091

20922092
if (!IS_DTYPE(origDst->getType()) || origDst->getHorzStride() != 1 ||
2093-
!builder.isOpndAligned(origDst, 32))
2093+
!builder.isOpndAligned(origDst, getGRFSize()))
20942094
{
20952095
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096-
G4_DstRegRegion* tmpDst = insertMovAfter(machIter, origDst, tmpType, bb);
2096+
G4_DstRegRegion* tmpDst = insertMovAfter(machIter, origDst, tmpType, bb, GRFALIGN);
20972097
maclInst->setDest(tmpDst);
20982098
}
20992099
}
@@ -2460,10 +2460,9 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
24602460
// Translate MULH into
24612461
// MUL acc src0 src1
24622462
// MACH dst src0 src1
2463-
bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2463+
void HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
24642464
{
24652465
G4_INST* inst = *i;
2466-
INST_LIST_ITER iter = i;
24672466
G4_ExecSize execSize = inst->getExecSize();
24682467

24692468
int inst_opt = inst->getOption();
@@ -2530,23 +2529,20 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25302529
execSize > 1 ? builder.getRegionStride2() : builder.getRegionScalar(),
25312530
dst->getType());
25322531

2533-
++iter;
2534-
25352532
G4_INST* tmpMov = builder.createMov(execSize, dst, tmpSrc, inst->getOption(), false);
25362533
tmpMov->setPredicate(builder.duplicateOperand(inst->getPredicate()));
25372534

2538-
bb->insertBefore(iter, tmpMov);
2539-
//it will decrement back to mov
2540-
i = iter;
2535+
bb->insertAfter(i, tmpMov);
25412536

2542-
/*
2543-
Need to remove dst from uses list of mulh, and add them to movInst useList
2544-
add movInst to uselist of mulh.
2545-
Add mulh to def instruction list of movInst
2546-
*/
2537+
// Check the new inserted mov inst
2538+
i++;
2539+
2540+
// Need to remove dst from uses list of mulh, and add them to movInst useList
2541+
// add movInst to uselist of mulh.
2542+
// Add mulh to def instruction list of movInst
25472543
inst->transferUse(tmpMov);
25482544
inst->addDefUse(tmpMov, Opnd_src0);
2549-
return true;
2545+
return;
25502546
}
25512547

25522548
// src1 does not support modifier
@@ -2575,8 +2571,6 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25752571
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
25762572
// be done in expandMulPostSchedule pass.
25772573

2578-
bool newInstInserted = false;
2579-
25802574
// sat cannot be used at all in the macro sequence
25812575
// this effectivly means sat is broken for mul D D D
25822576
inst->setSaturate(g4::NOSAT);
@@ -2595,32 +2589,30 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
25952589
}
25962590

25972591
INST_LIST_ITER end_iter = i;
2598-
// check if the ACC source is aligned to mach dst
2599-
// ToDo: this should be checked by fixAcc?
2592+
// this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593+
// must be grf-aligned, so need to make mul to be grf-aligned.
26002594
G4_DstRegRegion* dst = inst->getDst();
26012595
if (inst->getSaturate() ||
26022596
dst->getExecTypeSize() > TypeSize(Type_D) ||
2603-
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
2597+
isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598+
!builder.isOpndAligned(dst, getGRFSize()))
26042599
{
26052600
// add a tmp mov
2606-
inst->setDest(insertMovAfter(i, dst, dst->getType(), bb));
2601+
inst->setDest(insertMovAfter(i, dst, dst->getType(), bb, GRFALIGN));
26072602
end_iter++;
2608-
newInstInserted = true;
26092603
}
26102604

26112605
if (execSize > builder.getNativeExecSize())
26122606
{
26132607
auto start_iter = i;
2614-
splitDWMULInst(i, end_iter, bb);
2615-
newInstInserted = true;
2608+
splitDWMULInst(start_iter, end_iter, bb);
2609+
// start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610+
i = start_iter;
26162611
}
2617-
2618-
if (newInstInserted)
2612+
else
26192613
{
2620-
// it will decrease back to mulh
26212614
i++;
26222615
}
2623-
return newInstInserted;
26242616
}
26252617
else
26262618
{
@@ -2634,7 +2626,7 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26342626
G4_INST* newMul = builder.createBinOp(G4_mul, execSize,
26352627
acc_dst_opnd, builder.duplicateOperand(src0), builder.duplicateOperand(src1), inst_opt, false);
26362628

2637-
bb->insertBefore(iter, newMul);
2629+
bb->insertBefore(i, newMul);
26382630
inst->copyDefsTo(newMul, false);
26392631

26402632
fixMulSrc1(std::prev(i), bb);
@@ -2681,10 +2673,16 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
26812673
{
26822674
auto start_iter = std::prev(i);
26832675
splitDWMULInst(start_iter, end_iter, bb);
2684-
i = end_iter;
2676+
// start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677+
i = start_iter;
2678+
}
2679+
else
2680+
{
2681+
// i points to mach, and need to check the new inserted mul before mach
2682+
i = std::prev(i);
26852683
}
2686-
return true;
26872684
}
2685+
return;
26882686
}
26892687

26902688
//
@@ -3569,6 +3567,11 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
35693567
evenlySplitInst(iter, bb);
35703568
G4_INST* expand_sec_half_op = *iter;
35713569
bb->insertBefore(last_iter, expand_sec_half_op);
3570+
// For the case that only one instruction needed to split, that is to say start equals to end
3571+
if (start == end)
3572+
{
3573+
start--;
3574+
}
35723575
end--;
35733576
bb->erase(iter);
35743577
}
@@ -5271,14 +5274,9 @@ void HWConformity::conformBB(G4_BB* bb)
52715274

52725275
if (inst->opcode() == G4_mulh)
52735276
{
5274-
if (fixMULHInst(i, bb))
5275-
{
5276-
// inserted mul before
5277-
// check the newly added MUL inst
5278-
i--;
5279-
next_iter = i;
5280-
continue;
5281-
}
5277+
fixMULHInst(i, bb);
5278+
next_iter = i;
5279+
continue;
52825280
}
52835281

52845282
#ifdef _DEBUG
@@ -7172,4 +7170,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
71727170
G4_Operand* new_src1 = insertMovBefore(it, 1, src1->getType(), bb);
71737171
inst->setSrc(new_src1, 1);
71747172
}
7175-
}
7173+
}

visa/HWConformity.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ namespace vISA
109109
void fixPackedSource(INST_LIST_ITER it, G4_BB *bb, G4_Type extype);
110110
bool fixMathInst(INST_LIST_ITER it, G4_BB *bb);
111111
bool fixMULInst(INST_LIST_ITER &it, G4_BB *bb);
112-
bool fixMULHInst(INST_LIST_ITER &i, G4_BB *bb);
112+
void fixMULHInst(INST_LIST_ITER &i, G4_BB *bb);
113113
void fixMulSrc1(INST_LIST_ITER i, G4_BB* bb);
114114
void splitDWMULInst(INST_LIST_ITER &start, INST_LIST_ITER &end, G4_BB *bb);
115115
void fixOpnds(INST_LIST_ITER it, G4_BB *bb, G4_Type& exType);

visa/Optimizer.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11066,6 +11066,7 @@ void Optimizer::expandMulPostSchedule()
1106611066
}
1106711067
else
1106811068
{
11069+
// create a mach inst
1106911070
inst->setOpcode(G4_mul);
1107011071
maclOrMachInst = builder.createMach(inst->getExecSize(),
1107111072
dst, builder.duplicateOperand(src0), builder.duplicateOperand(src1), origOptions, accType);
@@ -11080,9 +11081,9 @@ void Optimizer::expandMulPostSchedule()
1108011081
auto maclOrMachInstIt = bb->insertAfter(it, maclOrMachInst);
1108111082

1108211083
// Always add a dummy mov after mach/macl for HW read suppresion W/A
11083-
auto dummyMovSrc = builder.createSrc(dst->getTopDcl()->getRegVar(),
11084+
auto dummyMovSrc = builder.createSrc(dst->getBase(),
1108411085
0, 0, builder.getRegionScalar(), Type_D);
11085-
G4_INST* dummyMov = builder.createMov(g4::SIMD16, builder.createNullDst(Type_D),
11086+
G4_INST* dummyMov = builder.createMov(g4::SIMD1, builder.createNullDst(Type_D),
1108611087
dummyMovSrc, InstOpt_WriteEnable, false);
1108711088
bb->insertAfter(maclOrMachInstIt, dummyMov);
1108811089
}

visa/include/VISAOptions.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ DEF_VISA_OPTION(vISA_DstSrcOverlapWA, ET_BOOL, "-dstSrcOverlapWA"
250250
DEF_VISA_OPTION(vISA_noSendSrcDstOverlap, ET_BOOL, "-noSendSrcDstOverlap", UNUSED, false)
251251
DEF_VISA_OPTION(vISA_cloneSampleInst, ET_BOOL, "-cloneSampleInst", UNUSED, false)
252252
DEF_VISA_OPTION(vISA_cloneEvaluateSampleInst, ET_BOOL, "-cloneEvaluateSampleInst", UNUSED, false)
253-
DEF_VISA_OPTION(vISA_expandMulPostSchedule, ET_BOOL, "-expandMulPostSchedule", UNUSED, false)
253+
DEF_VISA_OPTION(vISA_expandMulPostSchedule, ET_BOOL, "-expandMulPostSchedule", UNUSED, true)
254254

255255
//=== HW debugging options ===
256256
DEF_VISA_OPTION(vISA_GenerateDebugInfo, ET_BOOL, "-generateDebugInfo", UNUSED, false)

0 commit comments

Comments
 (0)