@@ -2048,10 +2048,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
2048
2048
2049
2049
// need extra move for dst
2050
2050
if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2051
- !builder.isOpndAligned (origDst, 32 ))
2051
+ !builder.isOpndAligned (origDst, getGRFSize () ))
2052
2052
{
2053
2053
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2054
- G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb);
2054
+ G4_DstRegRegion* tmpDst = insertMovAfter (it, origDst, tmpType, bb, GRFALIGN );
2055
2055
mulInst->setDest (tmpDst);
2056
2056
}
2057
2057
}
@@ -2090,10 +2090,10 @@ void HWConformity::doGenerateMacl(INST_LIST_ITER it, G4_BB* bb)
2090
2090
machIter = bb->insertBefore (++machIter, maclInst);
2091
2091
2092
2092
if (!IS_DTYPE (origDst->getType ()) || origDst->getHorzStride () != 1 ||
2093
- !builder.isOpndAligned (origDst, 32 ))
2093
+ !builder.isOpndAligned (origDst, getGRFSize () ))
2094
2094
{
2095
2095
// macl dst must be grf-aligned, packed D/UD as it is also used for the implicit acc source's region
2096
- G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb);
2096
+ G4_DstRegRegion* tmpDst = insertMovAfter (machIter, origDst, tmpType, bb, GRFALIGN );
2097
2097
maclInst->setDest (tmpDst);
2098
2098
}
2099
2099
}
@@ -2460,10 +2460,9 @@ bool HWConformity::fixMULInst(INST_LIST_ITER& i, G4_BB* bb)
2460
2460
// Translate MULH into
2461
2461
// MUL acc src0 src1
2462
2462
// MACH dst src0 src1
2463
- bool HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2463
+ void HWConformity::fixMULHInst (INST_LIST_ITER& i, G4_BB* bb)
2464
2464
{
2465
2465
G4_INST* inst = *i;
2466
- INST_LIST_ITER iter = i;
2467
2466
G4_ExecSize execSize = inst->getExecSize ();
2468
2467
2469
2468
int inst_opt = inst->getOption ();
@@ -2530,23 +2529,20 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2530
2529
execSize > 1 ? builder.getRegionStride2 () : builder.getRegionScalar (),
2531
2530
dst->getType ());
2532
2531
2533
- ++iter;
2534
-
2535
2532
G4_INST* tmpMov = builder.createMov (execSize, dst, tmpSrc, inst->getOption (), false );
2536
2533
tmpMov->setPredicate (builder.duplicateOperand (inst->getPredicate ()));
2537
2534
2538
- bb->insertBefore (iter, tmpMov);
2539
- // it will decrement back to mov
2540
- i = iter;
2535
+ bb->insertAfter (i, tmpMov);
2541
2536
2542
- /*
2543
- Need to remove dst from uses list of mulh, and add them to movInst useList
2544
- add movInst to uselist of mulh.
2545
- Add mulh to def instruction list of movInst
2546
- */
2537
+ // Check the new inserted mov inst
2538
+ i++;
2539
+
2540
+ // Need to remove dst from uses list of mulh, and add them to movInst useList
2541
+ // add movInst to uselist of mulh.
2542
+ // Add mulh to def instruction list of movInst
2547
2543
inst->transferUse (tmpMov);
2548
2544
inst->addDefUse (tmpMov, Opnd_src0);
2549
- return true ;
2545
+ return ;
2550
2546
}
2551
2547
2552
2548
// src1 does not support modifier
@@ -2575,8 +2571,6 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2575
2571
// Here just create tmp variables to fix srcMod, cond modifier, saturate, etc. And Mul->Mul + Macl expanding will
2576
2572
// be done in expandMulPostSchedule pass.
2577
2573
2578
- bool newInstInserted = false ;
2579
-
2580
2574
// sat cannot be used at all in the macro sequence
2581
2575
// this effectivly means sat is broken for mul D D D
2582
2576
inst->setSaturate (g4::NOSAT);
@@ -2595,32 +2589,30 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2595
2589
}
2596
2590
2597
2591
INST_LIST_ITER end_iter = i;
2598
- // check if the ACC source is aligned to mach dst
2599
- // ToDo: this should be checked by fixAcc?
2592
+ // this mul will be expanded into mul+macl in expandMulPostSchedule pass. Since expanded macl
2593
+ // must be grf-aligned, so need to make mul to be grf-aligned.
2600
2594
G4_DstRegRegion* dst = inst->getDst ();
2601
2595
if (inst->getSaturate () ||
2602
2596
dst->getExecTypeSize () > TypeSize (Type_D) ||
2603
- isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst))
2597
+ isPreAssignedRegOffsetNonZero<G4_DstRegRegion>(dst) ||
2598
+ !builder.isOpndAligned (dst, getGRFSize ()))
2604
2599
{
2605
2600
// add a tmp mov
2606
- inst->setDest (insertMovAfter (i, dst, dst->getType (), bb));
2601
+ inst->setDest (insertMovAfter (i, dst, dst->getType (), bb, GRFALIGN ));
2607
2602
end_iter++;
2608
- newInstInserted = true ;
2609
2603
}
2610
2604
2611
2605
if (execSize > builder.getNativeExecSize ())
2612
2606
{
2613
2607
auto start_iter = i;
2614
- splitDWMULInst (i, end_iter, bb);
2615
- newInstInserted = true ;
2608
+ splitDWMULInst (start_iter, end_iter, bb);
2609
+ // start_iter points to the first half of mulh. Need double check this new inserted mulh to see if need split again
2610
+ i = start_iter;
2616
2611
}
2617
-
2618
- if (newInstInserted)
2612
+ else
2619
2613
{
2620
- // it will decrease back to mulh
2621
2614
i++;
2622
2615
}
2623
- return newInstInserted;
2624
2616
}
2625
2617
else
2626
2618
{
@@ -2634,7 +2626,7 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2634
2626
G4_INST* newMul = builder.createBinOp (G4_mul, execSize,
2635
2627
acc_dst_opnd, builder.duplicateOperand (src0), builder.duplicateOperand (src1), inst_opt, false );
2636
2628
2637
- bb->insertBefore (iter , newMul);
2629
+ bb->insertBefore (i , newMul);
2638
2630
inst->copyDefsTo (newMul, false );
2639
2631
2640
2632
fixMulSrc1 (std::prev (i), bb);
@@ -2681,10 +2673,16 @@ bool HWConformity::fixMULHInst(INST_LIST_ITER& i, G4_BB* bb)
2681
2673
{
2682
2674
auto start_iter = std::prev (i);
2683
2675
splitDWMULInst (start_iter, end_iter, bb);
2684
- i = end_iter;
2676
+ // start_iter ponits to the first half of mul. Need to check the new inserted mul/mach instructions
2677
+ i = start_iter;
2678
+ }
2679
+ else
2680
+ {
2681
+ // i points to mach, and need to check the new inserted mul before mach
2682
+ i = std::prev (i);
2685
2683
}
2686
- return true ;
2687
2684
}
2685
+ return ;
2688
2686
}
2689
2687
2690
2688
//
@@ -3569,6 +3567,11 @@ void HWConformity::splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4
3569
3567
evenlySplitInst (iter, bb);
3570
3568
G4_INST* expand_sec_half_op = *iter;
3571
3569
bb->insertBefore (last_iter, expand_sec_half_op);
3570
+ // For the case that only one instruction needed to split, that is to say start equals to end
3571
+ if (start == end)
3572
+ {
3573
+ start--;
3574
+ }
3572
3575
end--;
3573
3576
bb->erase (iter);
3574
3577
}
@@ -5271,14 +5274,9 @@ void HWConformity::conformBB(G4_BB* bb)
5271
5274
5272
5275
if (inst->opcode () == G4_mulh)
5273
5276
{
5274
- if (fixMULHInst (i, bb))
5275
- {
5276
- // inserted mul before
5277
- // check the newly added MUL inst
5278
- i--;
5279
- next_iter = i;
5280
- continue ;
5281
- }
5277
+ fixMULHInst (i, bb);
5278
+ next_iter = i;
5279
+ continue ;
5282
5280
}
5283
5281
5284
5282
#ifdef _DEBUG
@@ -7172,4 +7170,4 @@ void HWConformity::fixSrc1Region(INST_LIST_ITER it, G4_BB* bb)
7172
7170
G4_Operand* new_src1 = insertMovBefore (it, 1 , src1->getType (), bb);
7173
7171
inst->setSrc (new_src1, 1 );
7174
7172
}
7175
- }
7173
+ }
0 commit comments