Skip to content

Commit bdd64d1

Browse files
pszymichZuul
authored andcommitted
Synchronization change between internal branches.
Change-Id: I7acf64fe55a03c5d26300383168774de9c66f66e
1 parent 8f8b67d commit bdd64d1

File tree

4 files changed

+45
-36
lines changed

4 files changed

+45
-36
lines changed

visa/FlowGraph.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2968,14 +2968,14 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29682968
if (activeJoinBlocks.size() > 1 ||
29692969
(activeJoinBlocks.size() == 1 && activeJoinBlocks.back().IsNestedJoin))
29702970
{
2971-
bb->setInNestedDivergentBranch(true);
2971+
setInNestedDivergentBranch(bb);
29722972
}
29732973
}
29742974
else if ((builder->getuint32Option(vISA_noMaskToAnyhWA) & 0x3) > 0)
29752975
{
29762976
if (activeJoinBlocks.size() > 0)
29772977
{
2978-
bb->setInNestedDivergentBranch(true);
2978+
setInNestedDivergentBranch(bb);
29792979
}
29802980
}
29812981

visa/FlowGraph.h

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -284,21 +284,6 @@ class G4_BB
284284
// if the block is under simd flow control
285285
bool inSimdFlow;
286286

287-
// [HW WA]
288-
// If the block does not post-dominate the entry, it is considered
289-
// as divergent BB. Within a divergent BB, a further divergence is
290-
// considered as nested divergent.
291-
//
292-
// [Formal def] define root_1_divergentBB (level 1 divergent root)
293-
// to be BBs that
294-
// 1. not pdom(BB, entry); and
295-
// 2. There exists P, so that idom(P, BB) && pdom(P, entry)
296-
// A BB is in a nested divergent branch if there is a root_1_divergentBB,
297-
// say B1, such that dom(B1, BB) && not pdom(BB, B1).
298-
//
299-
// This is set in processGoto().
300-
bool inNestedDivergentBranch;
301-
302287
// the physical pred/succ for this block (i.e., the pred/succ for this block in the BB list)
303288
// Note that some transformations may rearrange BB layout, so for safety it's best to recompute
304289
// this
@@ -384,8 +369,7 @@ class G4_BB
384369
traversal(0), idom(NULL), beforeCall(NULL),
385370
afterCall(NULL), calleeInfo(NULL), BBType(G4_BB_NONE_TYPE),
386371
inNaturalLoop(false), hasSendInBB(false), loopNestLevel(0), scopeID(0),
387-
inSimdFlow(false), inNestedDivergentBranch(false),
388-
physicalPred(NULL), physicalSucc(NULL), parent(fg),
372+
inSimdFlow(false), physicalPred(NULL), physicalSucc(NULL), parent(fg),
389373
instList(alloc)
390374
{
391375
}
@@ -430,10 +414,6 @@ class G4_BB
430414

431415
void setSendInBB(bool val) { hasSendInBB = val; }
432416
bool isSendInBB() { return hasSendInBB; }
433-
434-
void setInNestedDivergentBranch(bool val) { inNestedDivergentBranch = val; }
435-
bool isInNestedDivergentBranch() const { return inNestedDivergentBranch; }
436-
437417
void setNestLevel() {loopNestLevel ++;}
438418
unsigned char getNestLevel() {return loopNestLevel;}
439419
void resetNestLevel() { loopNestLevel = 0; }
@@ -706,6 +686,21 @@ class FlowGraph
706686
// ToDo: We should use FuncInfo instead, but at the time it was needed FuncInfo was not constructed yet..
707687
std::unordered_map<G4_Label*, std::vector<G4_BB*>> subroutines;
708688

689+
// [HW WA]
690+
// If the block does not post-dominate the entry, it is considered
691+
// as divergent BB. Within a divergent BB, a further divergence is
692+
// considered as nested divergent.
693+
//
694+
// [Formal def] define root_1_divergentBB (level 1 divergent root)
695+
// to be BBs that
696+
// 1. not pdom(BB, entry); and
697+
// 2. There exists P, so that idom(P, BB) && pdom(P, entry)
698+
// A BB is in a nested divergent branch if there is a root_1_divergentBB,
699+
// say B1, such that dom(B1, BB) && not pdom(BB, B1).
700+
//
701+
// This is set in processGoto().
702+
std::unordered_map<G4_BB*, int> nestedDivergentBBs;
703+
709704
public:
710705
typedef std::pair<G4_BB*, G4_BB*> Edge;
711706
typedef std::set<G4_BB*> Blocks;
@@ -922,6 +917,15 @@ class FlowGraph
922917
return false;
923918
}
924919

920+
void setInNestedDivergentBranch(G4_BB* B)
921+
{
922+
nestedDivergentBBs[B] = 1;
923+
}
924+
bool isInNestedDivergentBranch(G4_BB* B) const
925+
{
926+
return nestedDivergentBBs.count(B) > 0;
927+
}
928+
925929
//
926930
// Merge multiple returns into one, prepare for spill code insertion
927931
//

visa/Gen4_IR.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3312,16 +3312,22 @@ G4_INST::isComprInvariantSrcRegion(G4_SrcRegRegion* src, int srcPos)
33123312
bool G4_INST::isPartialWrite() const
33133313
{
33143314
G4_Predicate* aPred = predicate;
3315-
if (aPred && G4_Predicate::isAnyH(aPred->getControl()) &&
3316-
builder.kernel.getOptions()->getTarget() != VISA_CM)
3315+
if (builder.kernel.getOptions()->getTarget() != VISA_CM &&
3316+
(builder.getuint32Option(vISA_noMaskToAnyhWA) & 0x3) > 0 &&
3317+
(getGenxPlatform() == GENX_TGLLP ||
3318+
builder.getOption(vISA_forceNoMaskToAnyhWA)))
33173319
{
33183320
// Only for code from IGC. HW WA related.
3319-
if (aPred->getPredCtrlGroupSize() >= builder.kernel.getSimdSize())
3321+
if (aPred && G4_Predicate::isAnyH(aPred->getControl()))
33203322
{
3321-
// This is equivalent to NoMask
3322-
aPred = nullptr;
3323+
if (aPred->getPredCtrlGroupSize() >= builder.kernel.getSimdSize())
3324+
{
3325+
// equivalent to NoMask without predicate [ie (w)]
3326+
aPred = nullptr;
3327+
}
33233328
}
33243329
}
3330+
33253331
return (aPred != NULL && op != G4_sel) || op == G4_smov;
33263332
}
33273333

visa/Optimizer.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11747,7 +11747,7 @@ void Optimizer::replaceNoMaskWithAnyhWA()
1174711747
for (auto BI : fg)
1174811748
{
1174911749
G4_BB* BB = BI;
11750-
if (!BB->isInNestedDivergentBranch()) {
11750+
if (fg.isInNestedDivergentBranch(BB)) {
1175111751
continue;
1175211752
}
1175311753

@@ -11808,7 +11808,7 @@ void Optimizer::replaceNoMaskWithAnyhWA()
1180811808
// (2) (W) mov (1|M0) f1.0<1>:ud ce0.0<0;1,0>:ud
1180911809
// (f1.0.any16h) inst1
1181011810
// The algo tries to use (1) if doing so has lower chance to increase flag
11811-
// register pressure; otherwise, it uses (2).
11811+
// register pressure and if f0.0 is local; otherwise, it uses (2).
1181211812

1181311813
// 1. Check if an existing flag can be used
1181411814
// If BB's predecessor ends with a conditional goto instruction,
@@ -11822,7 +11822,9 @@ void Optimizer::replaceNoMaskWithAnyhWA()
1182211822
assert(gotoInst->opcode() == G4_goto && "Last inst should be goto!");
1182311823
G4_Predicate* pred = gotoInst->getPredicate();
1182411824
if (predBB->Succs.size() == 2 &&
11825-
pred && pred->getControl() == PRED_DEFAULT) {
11825+
pred && pred->getControl() == PRED_DEFAULT &&
11826+
!fg.globalOpndHT.isOpndGlobal(pred))
11827+
{
1182611828
prevPred = pred;
1182711829
predTargetBB = predBB->Succs.back();
1182811830
}
@@ -11868,12 +11870,9 @@ void Optimizer::replaceNoMaskWithAnyhWA()
1186811870
assert(predTargetBB && "predTargetBB should not be null here");
1186911871

1187011872
// First, make Pred global
11871-
if (!fg.globalOpndHT.isOpndGlobal(prevPred))
11872-
{
11873-
fg.globalOpndHT.addGlobalOpnd(prevPred);
11874-
}
11875-
flagVarForBB = prevPred->getBase()->asRegVar();
11873+
fg.globalOpndHT.addGlobalOpnd(prevPred);
1187611874

11875+
flagVarForBB = prevPred->getBase()->asRegVar();
1187711876
if ((predTargetBB == BB && prevPred->getState() == PredState_Minus) ||
1187811877
(predTargetBB != BB && prevPred->getState() == PredState_Plus))
1187911878
{

0 commit comments

Comments
 (0)