Skip to content

Commit 396f122

Browse files
jgu222sys_zuul
authored andcommitted
TGL has control-flow bug, which causes the execution of NoMask
instructions that should not be executed as no channels are on. This is to work-around this bug by changing NoMask to predicate with any PredCtrl, such as any16h. Change-Id: If43cce7ca0ef0ed533c80c6925b9305be7e6d029
1 parent 43dc6e1 commit 396f122

File tree

10 files changed

+400
-25
lines changed

10 files changed

+400
-25
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3984,6 +3984,11 @@ namespace IGC
39843984
SaveOption(vISA_EnableScalarJmp, false);
39853985
}
39863986

3987+
if (IGC_IS_FLAG_ENABLED(ForceNoMaskToAnyhWA)) {
3988+
SaveOption(vISA_forceNoMaskToAnyhWA, true);
3989+
}
3990+
SaveOption(vISA_noMaskToAnyhWA, IGC_GET_FLAG_VALUE(NoMaskToAnyhWA));
3991+
39873992
if (IGC_IS_FLAG_ENABLED(DisableCSEL))
39883993
{
39893994
SaveOption(vISA_enableCSEL, false);

IGC/common/igc_flags.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ DECLARE_IGC_REGKEY(bool, EnableVISADebug, false, "Runs VISA in deb
4747
DECLARE_IGC_REGKEY(DWORD, EnableVISAStructurizer, 1, "Enable/Disable VISA structurizer. See value defs in igc_flags.hpp.", false)
4848
DECLARE_IGC_REGKEY(bool, EnableVISAJmpi, true, "Enable/Disable VISA generating jmpi (scalar jump).", false)
4949
DECLARE_IGC_REGKEY(bool, EnableSCFWA, true, "Enable/Disable VISA structrizer WA (temporary for debugging)", false)
50+
DECLARE_IGC_REGKEY(DWORD, NoMaskToAnyhWA, 0, "Control replacing NoMask with anyh predicate. Valid value: (two groups) \
51+
valid value: bit[2] : how to insert anyh - 0 (simple) | 1 (optim|3. \
52+
bit[1:0] : which BB to insert - 0(off), 1(divergent BB), \
53+
2 (nested divergent BB", false)
54+
DECLARE_IGC_REGKEY(bool, ForceNoMaskToAnyhWA, false, "Force replacing NoMask with any predicate", false)
5055
DECLARE_IGC_REGKEY(DWORD,UnifiedSendCycle, 0, "Using unified send cycle.", false)
5156
DECLARE_IGC_REGKEY(DWORD,DisableMixMode, 0, "Disables mix mode in vISA BE.", false)
5257
DECLARE_IGC_REGKEY(DWORD,DisableHFMath, 0, "Disables HF math instructions.", false)

visa/BuildIR.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,7 @@ class IR_Builder {
539539

540540
const Options* getOptions() const { return m_options; }
541541
bool getOption(vISAOptions opt) const {return m_options->getOption(opt); }
542+
uint32_t getuint32Option(vISAOptions opt) const { return m_options->getuInt32Option(opt); }
542543
void getOption(vISAOptions opt, const char *&str) const {return m_options->getOption(opt, str); }
543544
void addInputArg(input_info_t * inpt);
544545
input_info_t * getInputArg(unsigned int index);

visa/FlowGraph.cpp

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,34 +2694,44 @@ void FlowGraph::insertJoinToBB(G4_BB* bb, uint8_t execSize, G4_Label* jip)
26942694
}
26952695
}
26962696

2697-
typedef std::pair<G4_BB*, int> BlockSizePair;
2697+
struct SJoinInfo {
2698+
SJoinInfo(G4_BB* B, uint16_t E, bool Nested = false) :
2699+
BB(B), ExecSize(E), IsNestedJoin(Nested) {}
2700+
G4_BB* BB;
2701+
uint16_t ExecSize;
2702+
bool IsNestedJoin; // [HW WA] : join for a goto within a divergent BB.
2703+
};
26982704

2699-
static void addBBToActiveJoinList(std::list<BlockSizePair>& activeJoinBlocks, G4_BB* bb, int execSize)
2705+
static void addBBToActiveJoinList(std::list<SJoinInfo>& activeJoinBlocks, G4_BB* bb, int execSize)
27002706
{
27012707
// add goto target to list of active blocks that need a join
2702-
std::list<BlockSizePair>::iterator listIter;
2708+
std::list<SJoinInfo>::iterator listIter;
27032709
for (listIter = activeJoinBlocks.begin(); listIter != activeJoinBlocks.end(); ++listIter)
27042710
{
2705-
G4_BB* aBB = (*listIter).first;
2711+
// If activeJoinBlocks isn't empty, this join should be considered as a nested join
2712+
SJoinInfo& jinfo = (*listIter);
2713+
G4_BB* aBB = jinfo.BB;
27062714
if (aBB->getId() == bb->getId())
27072715
{
27082716
// block already in list, update exec size if necessary
2709-
if (execSize > (*listIter).second)
2717+
if (execSize > jinfo.ExecSize)
27102718
{
2711-
(*listIter).second = execSize;
2719+
jinfo.ExecSize = execSize;
27122720
}
2721+
jinfo.IsNestedJoin = true;
27132722
break;
27142723
}
27152724
else if (aBB->getId() > bb->getId())
27162725
{
2717-
activeJoinBlocks.insert(listIter, BlockSizePair(bb, execSize));
2726+
activeJoinBlocks.insert(listIter, SJoinInfo(bb, execSize, true));
27182727
break;
27192728
}
27202729
}
27212730

27222731
if (listIter == activeJoinBlocks.end())
27232732
{
2724-
activeJoinBlocks.push_back(BlockSizePair(bb, execSize));
2733+
bool nested = activeJoinBlocks.empty() ? false : true;
2734+
activeJoinBlocks.push_back(SJoinInfo(bb, execSize, nested));
27252735
}
27262736
}
27272737

@@ -2850,7 +2860,7 @@ void FlowGraph::setJIPForEndif(G4_INST* endif, G4_INST* target, G4_BB* targetBB)
28502860
void FlowGraph::processGoto(bool HasSIMDCF)
28512861
{
28522862
// list of active blocks where a join needs to be inserted, sorted in lexical order
2853-
std::list<BlockSizePair> activeJoinBlocks;
2863+
std::list<SJoinInfo> activeJoinBlocks;
28542864
bool doScalarJmp = !builder->noScalarJmp();
28552865

28562866
for (BB_LIST_ITER it = BBs.begin(), itEnd = BBs.end(); it != itEnd; ++it)
@@ -2863,18 +2873,18 @@ void FlowGraph::processGoto(bool HasSIMDCF)
28632873

28642874
if (activeJoinBlocks.size() > 0)
28652875
{
2866-
if (bb == activeJoinBlocks.front().first)
2876+
if (bb == activeJoinBlocks.front().BB)
28672877
{
28682878
// This block is the target of one or more forward goto,
28692879
// or the fall-thru of a backward goto, needs to insert a join
2870-
int execSize = activeJoinBlocks.front().second;
2880+
int execSize = activeJoinBlocks.front().ExecSize;
28712881
G4_Label* joinJIP = NULL;
28722882

28732883
activeJoinBlocks.pop_front();
28742884
if (activeJoinBlocks.size() > 0)
28752885
{
28762886
//set join JIP to the next active join
2877-
G4_BB* joinBlock = activeJoinBlocks.front().first;
2887+
G4_BB* joinBlock = activeJoinBlocks.front().BB;
28782888
joinJIP = joinBlock->getLabel();
28792889
}
28802890

@@ -2950,6 +2960,25 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29502960
bb->setInSimdFlow(true);
29512961
}
29522962

2963+
// [HW WA] set nested divergent branch.
2964+
// 1) [conservative] set it if it is divergent, but not necessarily nested, or
2965+
// 2) Set it if there are at least two active joins or one nested join.
2966+
if ((builder->getuint32Option(vISA_noMaskToAnyhWA) & 0x3) > 1)
2967+
{
2968+
if (activeJoinBlocks.size() > 1 ||
2969+
(activeJoinBlocks.size() == 1 && activeJoinBlocks.back().IsNestedJoin))
2970+
{
2971+
bb->setInNestedDivergentBranch(true);
2972+
}
2973+
}
2974+
else if ((builder->getuint32Option(vISA_noMaskToAnyhWA) & 0x3) > 0)
2975+
{
2976+
if (activeJoinBlocks.size() > 0)
2977+
{
2978+
bb->setInNestedDivergentBranch(true);
2979+
}
2980+
}
2981+
29532982
G4_INST* lastInst = bb->back();
29542983
if (lastInst->opcode() == G4_goto && !lastInst->asCFInst()->isBackward())
29552984
{
@@ -2959,7 +2988,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29592988
bool isUniform = lastInst->getExecSize() == 1 || lastInst->getPredicate() == NULL;
29602989

29612990
if (isUniform && doScalarJmp &&
2962-
(activeJoinBlocks.size() == 0 || activeJoinBlocks.front().first->getId() > gotoTargetBB->getId()))
2991+
(activeJoinBlocks.size() == 0 || activeJoinBlocks.front().BB->getId() > gotoTargetBB->getId()))
29632992
{
29642993
// can convert goto into a scalar jump to UIP, if the jmp will not make us skip any joins
29652994
// CFG itself does not need to be updated
@@ -2970,7 +2999,7 @@ void FlowGraph::processGoto(bool HasSIMDCF)
29702999
//set goto JIP to the first active block
29713000
uint8_t eSize = lastInst->getExecSize() > 1 ? lastInst->getExecSize() : pKernel->getSimdSize();
29723001
addBBToActiveJoinList(activeJoinBlocks, gotoTargetBB, eSize);
2973-
G4_BB* joinBlock = activeJoinBlocks.front().first;
3002+
G4_BB* joinBlock = activeJoinBlocks.front().BB;
29743003
if (lastInst->getExecSize() == 1)
29753004
{ // For simd1 goto, convert it to a goto with the right execSize.
29763005
lastInst->setExecSize(eSize);

visa/FlowGraph.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,21 @@ class G4_BB
284284
// if the block is under simd flow control
285285
bool inSimdFlow;
286286

287+
// [HW WA]
288+
// If the block does not post-dominate the entry, it is considered
289+
// as divergent BB. Within a divergent BB, a further divergence is
290+
// considered as nested divergent.
291+
//
292+
// [Formal def] define root_1_divergentBB (level 1 divergent root)
293+
// to be BBs that
294+
// 1. not pdom(BB, entry); and
295+
// 2. There exists P, so that idom(P, BB) && pdom(P, entry)
296+
// A BB is in a nested divergent branch if there is a root_1_divergentBB,
297+
// say B1, such that dom(B1, BB) && not pdom(BB, B1).
298+
//
299+
// This is set in processGoto().
300+
bool inNestedDivergentBranch;
301+
287302
// the physical pred/succ for this block (i.e., the pred/succ for this block in the BB list)
288303
// Note that some transformations may rearrange BB layout, so for safety it's best to recompute
289304
// this
@@ -369,7 +384,8 @@ class G4_BB
369384
traversal(0), idom(NULL), beforeCall(NULL),
370385
afterCall(NULL), calleeInfo(NULL), BBType(G4_BB_NONE_TYPE),
371386
inNaturalLoop(false), hasSendInBB(false), loopNestLevel(0), scopeID(0),
372-
inSimdFlow(false), physicalPred(NULL), physicalSucc(NULL), parent(fg),
387+
inSimdFlow(false), inNestedDivergentBranch(false),
388+
physicalPred(NULL), physicalSucc(NULL), parent(fg),
373389
instList(alloc)
374390
{
375391
}
@@ -415,6 +431,9 @@ class G4_BB
415431
void setSendInBB(bool val) { hasSendInBB = val; }
416432
bool isSendInBB() { return hasSendInBB; }
417433

434+
void setInNestedDivergentBranch(bool val) { inNestedDivergentBranch = val; }
435+
bool isInNestedDivergentBranch() const { return inNestedDivergentBranch; }
436+
418437
void setNestLevel() {loopNestLevel ++;}
419438
unsigned char getNestLevel() {return loopNestLevel;}
420439
void resetNestLevel() { loopNestLevel = 0; }

visa/Gen4_IR.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3309,6 +3309,22 @@ G4_INST::isComprInvariantSrcRegion(G4_SrcRegRegion* src, int srcPos)
33093309
}
33103310
}
33113311

3312+
bool G4_INST::isPartialWrite() const
3313+
{
3314+
G4_Predicate* aPred = predicate;
3315+
if (aPred && G4_Predicate::isAnyH(aPred->getControl()) &&
3316+
builder.kernel.getOptions()->getTarget() != VISA_CM)
3317+
{
3318+
// Only for code from IGC. HW WA related.
3319+
if (aPred->getPredCtrlGroupSize() >= builder.kernel.getSimdSize())
3320+
{
3321+
// This is equivalent to NoMask
3322+
aPred = nullptr;
3323+
}
3324+
}
3325+
return (aPred != NULL && op != G4_sel) || op == G4_smov;
3326+
}
3327+
33123328
bool G4_INST::isAccSrcInst() const
33133329
{
33143330
if (srcs[0] && srcs[0]->isSrcRegRegion() && srcs[0]->asSrcRegRegion()->getBase()->isAccReg())

visa/Gen4_IR.hpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -685,10 +685,10 @@ class G4_INST
685685
uint32_t getLexicalId() const { return global_id; }
686686
void setLexicalId(uint32_t id) { global_id = id; }
687687
void setPredicate(G4_Predicate* p);
688-
G4_Predicate* getPredicate() const {return predicate;}
689-
void setSaturate(bool s) {sat = s;}
690-
bool getSaturate() const {return sat;}
691-
G4_opcode opcode() const {return op;}
688+
G4_Predicate* getPredicate() const { return predicate; }
689+
void setSaturate(bool s) { sat = s; }
690+
bool getSaturate() const { return sat; }
691+
G4_opcode opcode() const { return op; }
692692

693693
void setOpcode(G4_opcode opcd);
694694

@@ -730,7 +730,7 @@ class G4_INST
730730
bool isSplitSend() const { return op == G4_sends || op == G4_sendsc; }
731731

732732
// ToDo: get rid of these functions which don't make sense for non-sends
733-
virtual bool isEOT() const { return false;}
733+
virtual bool isEOT() const { return false; }
734734
virtual G4_SendMsgDescriptor* getMsgDesc() const { return nullptr; }
735735

736736
virtual bool mayExceedTwoGRF() const
@@ -741,10 +741,6 @@ class G4_INST
741741
virtual void computeRightBound(G4_Operand* opnd);
742742

743743
bool isWait() const { return op == G4_wait; }
744-
bool isPartialWrite() const
745-
{
746-
return (predicate != NULL && op != G4_sel) || op == G4_smov;
747-
}
748744
bool isSWSBSync() const
749745
{
750746
return op == G4_sync_nop || op == G4_sync_allrd || op == G4_sync_allwr;
@@ -755,6 +751,7 @@ class G4_INST
755751
return op == G4_pseudo_and || op == G4_pseudo_or || op == G4_pseudo_xor || op == G4_pseudo_not;
756752
}
757753

754+
bool isPartialWrite() const;
758755
bool isArithAddr() const;
759756
bool isMovAddr() const;
760757
bool isAccSrcInst() const;
@@ -3257,6 +3254,7 @@ class G4_Predicate final : public G4_Operand
32573254
G4_PredState getState() const { return state; }
32583255
void setState(G4_PredState s) { state = s; }
32593256
G4_Predicate_Control getControl() const { return control; }
3257+
void setControl(G4_Predicate_Control PredCtrl) { control = PredCtrl; }
32603258
bool samePredicate(const G4_Predicate& prd) const;
32613259
void emit(std::ostream& output, bool symbolreg = false) override;
32623260

0 commit comments

Comments
 (0)