Skip to content

Commit e5df3df

Browse files
bcheng0127igcbot
authored andcommitted
Fix the bug for SWSB dependence missing on undefined variable
cmp.eq v5444, v5444 v5444 is undefined and will be assigned with any register. As a result there may be RAW dependence in the SIMD control. Current SWSB only track WAR WAW along the SIMD control.
1 parent 2f8b563 commit e5df3df

File tree

7 files changed

+71
-3
lines changed

7 files changed

+71
-3
lines changed

visa/G4_Declare.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class G4_Declare {
9393
uint16_t addrSpillFill : 1;
9494
uint16_t forceSpilled : 1;
9595
uint16_t exclusiveLoad : 1;
96+
uint16_t isCmpUseOnly : 1;
9697

9798
unsigned declId; // global decl id for this builder
9899

@@ -330,6 +331,9 @@ class G4_Declare {
330331
void setPreDefinedVar(bool b) { PreDefinedVar = b; }
331332
bool isPreDefinedVar() const { return PreDefinedVar; }
332333

334+
void setIsCmpUseOnly(bool b) { isCmpUseOnly = b; }
335+
bool getIsCmpUseOnly() const { return isCmpUseOnly; }
336+
333337
unsigned getNumRegNeeded() const;
334338

335339
void emit(std::ostream &output) const;

visa/G4_IR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5154,6 +5154,7 @@ G4_Declare::G4_Declare(const IR_Builder &builder, const char *n,
51545154
addrSpillFill = false;
51555155
forceSpilled = false;
51565156
exclusiveLoad = false;
5157+
isCmpUseOnly = false;
51575158
scopeID = 0;
51585159

51595160
declId = (unsigned)dcllist.size();

visa/GraphColor.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,7 @@ class GlobalRA {
14051405
static const RAVarInfo defaultValues;
14061406
std::vector<RAVarInfo> vars;
14071407
std::vector<G4_Declare *> UndeclaredVars;
1408+
std::vector<G4_Declare *> UndefinedCmpVars;
14081409

14091410
// fake declares for each GRF reg, used by HRA
14101411
// note only GRFs that are used by LRA get a declare
@@ -1617,6 +1618,7 @@ class GlobalRA {
16171618
void addSpillCodeInBB(G4_BB *bb) { BBsWithSpillCode.insert(bb); }
16181619

16191620
void addUndefinedDcl(G4_Declare *dcl) { UndeclaredVars.push_back(dcl); }
1621+
void addUndefinedCmpDcl(G4_Declare *dcl) { UndefinedCmpVars.push_back(dcl); }
16201622

16211623
bool isUndefinedDcl(const G4_Declare *dcl) const {
16221624
return std::find(UndeclaredVars.begin(), UndeclaredVars.end(), dcl) !=
@@ -1689,6 +1691,7 @@ class GlobalRA {
16891691
setBBId(dcl, UINT_MAX);
16901692
resetLocalLR(dcl);
16911693
}
1694+
UndefinedCmpVars.clear();
16921695
}
16931696

16941697
void clearLocalLiveRanges() {
@@ -2050,6 +2053,7 @@ class GlobalRA {
20502053
void addCalleeSavePseudoCode();
20512054
void addStoreRestoreToReturn();
20522055
void storeCEInProlog();
2056+
void setUndefinedVarCmp();
20532057
void markGraphBlockLocalVars();
20542058
void verifyRA(LivenessAnalysis &liveAnalysis);
20552059
void verifySpillFill();

visa/LocalRA.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ class LocalLiveRange {
135135

136136
bool assigned;
137137
bool isSplit;
138+
bool defined;
138139

139140
IR_Builder &builder;
140141

@@ -153,6 +154,7 @@ class LocalLiveRange {
153154
assigned = false;
154155
eot = false;
155156
isSplit = false;
157+
defined = false;
156158

157159
if (!builder.canWriteR0())
158160
addForbidden(0);
@@ -224,6 +226,9 @@ class LocalLiveRange {
224226
void markSplit() { isSplit = true; }
225227
bool getSplit() const { return isSplit; }
226228

229+
void markDefined() { defined = true; }
230+
bool isDefined() const { return defined; }
231+
227232
void addForbidden(unsigned int f) { forbiddenGRFs.insert(f); }
228233
std::unordered_set<unsigned int> &getForbidden() { return forbiddenGRFs; }
229234
};

visa/LocalScheduler/SWSB_G4IR.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,8 @@ SBFootprint *G4_BB_SB::getFootprintForGRF(G4_Operand *opnd,
10431043
GenPrecision precision = GenPrecision::INVALID;
10441044
bool isFcvtByteType = false;
10451045
bool isPrecision = false;
1046+
bool isCmpUseOnly = inst->opcode() == G4_cmp && opnd_num == Opnd_src0 &&
1047+
opnd->getTopDcl()->getIsCmpUseOnly();
10461048

10471049
if (inst->opcode() == G4_fcvt &&
10481050
(IS_BTYPE(type) ||
@@ -1175,7 +1177,7 @@ SBFootprint *G4_BB_SB::getFootprintForGRF(G4_Operand *opnd,
11751177
SBFootprint(GRF_T, precision, LB, RB, inst, isFcvtByteType)
11761178
: new (allocedMem)
11771179
SBFootprint(GRF_T, type, LB, RB, inst, isFcvtByteType);
1178-
1180+
footprint->isCmpUseOnly = isCmpUseOnly;
11791181
return footprint;
11801182
}
11811183

@@ -5600,6 +5602,9 @@ void G4_BB_SB::setSendOpndMayKilled(SBNODE_VECT &SBNodes, PointsToAnalysis &p,
56005602
send_may_kill.src.set(globalID);
56015603
} else if (dep == RAW) {
56025604
send_may_kill.dst.set(globalID);
5605+
if (curFootprint->isCmpUseOnly) {
5606+
send_WAW_may_kill.set(globalID);
5607+
}
56035608
// Exclusive WAW has no overlap
56045609
} else if (dep == WAW && (isSend || !isDclExclusiveLoad(
56055610
nodeInfo.topDeclare, topDcl))) {
@@ -8001,9 +8006,20 @@ void SWSB::addGlobalDependence(unsigned globalSendNum,
80018006
unsigned short internalOffset = 0;
80028007
bool hasOverlap =
80038008
curFootprint->hasOverlap(liveFootprint, internalOffset);
8009+
DepType dep = getDepForOpnd(liveOpnd, curOpnd);
80048010

8011+
// Following special dependence checking is to handle following case
8012+
// in SIMD control follow:
8013+
// cmp.eq v5444, v5444
8014+
// where v5444 is undefined and can be assigned with any register. As
8015+
// a result there may be RAW dependence in the SIMD control follow.
8016+
// For this special case, we treat it as a WAW dependence by checking
8017+
// if it's uninitialized declare used in cmp
8018+
if (!afterWrite && liveOpnd == Opnd_dst &&
8019+
curFootprint->isCmpUseOnly) {
8020+
dep = getDepForOpnd(liveOpnd, Opnd_dst);
8021+
}
80058022
// Find DEP type
8006-
DepType dep = getDepForOpnd(liveOpnd, curOpnd);
80078023
if (!hasOverlap && dep == RAW) {
80088024
hasOverlap =
80098025
sb_bb->hasExtraOverlap(liveInst, curInst, liveFootprint,

visa/LocalScheduler/SWSB_G4IR.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ struct SBFootprint {
141141
const unsigned short RightB;
142142
unsigned short offset = 0;
143143
bool isPrecision = false;
144-
bool isFcvtByteType = false;;
144+
bool isFcvtByteType = false;
145+
bool isCmpUseOnly = false;
145146
G4_INST *inst;
146147

147148
// FIXME: The choice of C-style linked list seems suspect given that there are

visa/RegAlloc.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1923,6 +1923,25 @@ void GlobalRA::markBlockLocalVars() {
19231923
it++) {
19241924
G4_INST *inst = *it;
19251925

1926+
// Chjeck if there is undefine variable used in CMP instruction, which is
1927+
// used to detect the execution mask.
1928+
// cmp.eq (M1, 16) P12 V0147(0,0)<0;1,0> V0147(0,0)<0;1,0>
1929+
if (inst->opcode() == G4_cmp) {
1930+
const bool isModEq =
1931+
inst->getCondMod() && inst->getCondMod()->getMod() == Mod_e;
1932+
const bool isNullDst = !inst->getDst() || inst->hasNULLDst();
1933+
const bool isSrc0SameAsSrc1 = inst->getSrc(0)->asSrcRegRegion() &&
1934+
inst->getSrc(1)->asSrcRegRegion() &&
1935+
*inst->getSrc(0)->asSrcRegRegion() ==
1936+
*inst->getSrc(1)->asSrcRegRegion();
1937+
if (isModEq && isNullDst && isSrc0SameAsSrc1) {
1938+
G4_Declare *topdcl = GetTopDclFromRegRegion(inst->getSrc(0));
1939+
if (topdcl && topdcl->getRegFile() == G4_GRF) {
1940+
addUndefinedCmpDcl(topdcl);
1941+
}
1942+
}
1943+
}
1944+
19261945
// Track direct dst references.
19271946

19281947
G4_DstRegRegion *dst = inst->getDst();
@@ -1945,6 +1964,7 @@ void GlobalRA::markBlockLocalVars() {
19451964
lr->setFirstRef(inst, 0);
19461965
}
19471966
lr->recordRef(bb);
1967+
lr->markDefined();
19481968
recordRef(topdcl);
19491969
}
19501970
}
@@ -2087,6 +2107,20 @@ bool GlobalRA::canSkipFDE() const {
20872107
return !kernel.fg.getHasStackCalls() && kernel.getOption(vISA_skipFDE);
20882108
}
20892109

2110+
void GlobalRA::setUndefinedVarCmp() {
2111+
// Iterate over all dcls and remove those with 0
2112+
// ref count and not addressed. This is done only for
2113+
// GRF dcls.
2114+
2115+
// Propagate top dcl info to aliases
2116+
for (auto dcl : UndefinedCmpVars) {
2117+
LocalLiveRange *lr = getLocalLR(dcl);
2118+
if (!lr->isDefined()) {
2119+
dcl->setIsCmpUseOnly(true);
2120+
}
2121+
}
2122+
}
2123+
20902124
//
20912125
// Mark block local (temporary) variables.
20922126
//
@@ -2097,6 +2131,9 @@ void GlobalRA::markGraphBlockLocalVars() {
20972131
// Create live ranges and record the reference info
20982132
markBlockLocalVars();
20992133

2134+
// Set undefined variable used in cmp
2135+
setUndefinedVarCmp();
2136+
21002137
VISA_DEBUG_VERBOSE({
21012138
std::cout << "\t--LOCAL VARIABLES--\n";
21022139
for (auto dcl : kernel.Declares) {

0 commit comments

Comments
 (0)