Skip to content

Commit 1174017

Browse files
bcheng0127igcbot
authored andcommitted
SWSB: Reduce compilation time of setSendOpndMayKilled
If no special case need be considered, register overlap can be simplied as share same bucket. At the same time, in the first scan in SBDDD, the touched GRF of current BB can be recorded.
1 parent a6b703d commit 1174017

File tree

3 files changed

+143
-2
lines changed

3 files changed

+143
-2
lines changed

visa/HWCaps.inc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,4 +801,8 @@ bool WARLocalization() const {
801801
bool supportPureBF() const {
802802
return false;
803803
}
804+
805+
bool hasReadSuppressionOrSharedLocalMemoryWAs() const {
806+
return true;
807+
}
804808
// end HW capabilities

visa/LocalScheduler/SWSB_G4IR.cpp

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1493,6 +1493,58 @@ void SWSB::handleFuncCall() {
14931493
}
14941494
}
14951495

1496+
//
1497+
// Set the global ID bit vector of each bucket touched by corresponding
1498+
// operands
1499+
//
1500+
void SWSB::SWSBInitializeGlobalNodesInBuckets(
1501+
std::vector<SparseBitVector>
1502+
&dstGlobalIDs, // node global ID bit vector in buckets touched by dst
1503+
// operands
1504+
std::vector<SparseBitVector>
1505+
&srcGlobalIDs, // node global ID bit vector in buckets touched by src
1506+
// operands
1507+
LiveGRFBuckets &globalSendsLB) { // live buckets of global sends
1508+
1509+
dstGlobalIDs.resize(kernel.getNumRegTotal() +
1510+
fg.builder->getNumScalarRegisters());
1511+
srcGlobalIDs.resize(kernel.getNumRegTotal() +
1512+
fg.builder->getNumScalarRegisters());
1513+
1514+
// Scan the global send LB to set the node bit in dst or src bit set of each
1515+
// bucket that node touches.
1516+
for (unsigned curBucket = 0;
1517+
curBucket <
1518+
kernel.getNumRegTotal() + fg.builder->getNumScalarRegisters();
1519+
curBucket++) {
1520+
SparseBitVector dstBitSet;
1521+
SparseBitVector srcBitSet;
1522+
1523+
bool setDstBucket = false;
1524+
bool setSrcBucket = false;
1525+
for (LiveGRFBuckets::BN_iterator bn_it = globalSendsLB.begin(curBucket);
1526+
bn_it != globalSendsLB.end(curBucket); ++bn_it) {
1527+
SBBucketNode *liveBN = (*bn_it);
1528+
SBNode *curLiveNode = liveBN->node;
1529+
1530+
if (liveBN->opndNum == Opnd_dst) {
1531+
dstBitSet.set(curLiveNode->globalID);
1532+
setDstBucket = true;
1533+
} else {
1534+
srcBitSet.set(curLiveNode->globalID);
1535+
setSrcBucket = true;
1536+
}
1537+
}
1538+
1539+
if (setDstBucket) {
1540+
dstGlobalIDs[curBucket] = dstBitSet;
1541+
}
1542+
if (setSrcBucket) {
1543+
srcGlobalIDs[curBucket] = srcBitSet;
1544+
}
1545+
}
1546+
}
1547+
14961548
void SWSB::SWSBGlobalTokenGenerator(PointsToAnalysis &p, LiveGRFBuckets &LB,
14971549
LiveGRFBuckets &globalSendsLB) {
14981550
allTokenNodesMap.resize(totalTokenNum);
@@ -1505,6 +1557,18 @@ void SWSB::SWSBGlobalTokenGenerator(PointsToAnalysis &p, LiveGRFBuckets &LB,
15051557
const bool enableDistPropTokenAllocation =
15061558
fg.builder->getOptions()->getOption(vISA_DistPropTokenAllocation);
15071559

1560+
std::vector<SparseBitVector> dstGlobalIDs;
1561+
std::vector<SparseBitVector> srcGlobalIDs;
1562+
if (!fg.builder
1563+
->hasReadSuppressionOrSharedLocalMemoryWAs() && // No WA needs check
1564+
// specific
1565+
// instruction
1566+
!indexes.DPASIndex) { // No DPAS instruction which may access part of GRF
1567+
// Initialilze for setSendGlobalIDMayKilledByCurrentBB only
1568+
SWSBInitializeGlobalNodesInBuckets(dstGlobalIDs, srcGlobalIDs,
1569+
globalSendsLB);
1570+
}
1571+
15081572
// Get the live out, may kill bit sets
15091573
for (G4_BB_SB *bb : BBVector) {
15101574
bb->liveInTokenNodes = BitSet(SBSendNodes.size(), false);
@@ -1537,7 +1601,15 @@ void SWSB::SWSBGlobalTokenGenerator(PointsToAnalysis &p, LiveGRFBuckets &LB,
15371601
}
15381602
}
15391603

1540-
bb->setSendOpndMayKilled(&globalSendsLB, SBNodes, p);
1604+
// No WA needs check specific instruction
1605+
// No DPAS instruction which may access part of GRF
1606+
if (!fg.builder->hasReadSuppressionOrSharedLocalMemoryWAs() &&
1607+
!indexes.DPASIndex) {
1608+
bb->setSendGlobalIDMayKilledByCurrentBB(dstGlobalIDs, srcGlobalIDs,
1609+
SBNodes, p);
1610+
} else {
1611+
bb->setSendOpndMayKilled(&globalSendsLB, SBNodes, p);
1612+
}
15411613

15421614
#ifdef DEBUG_VERBOSE_ON
15431615
bb->dumpLiveInfo(&globalSendOpndList, globalSendNum, nullptr);
@@ -5107,6 +5179,43 @@ void G4_BB_SB::setSendOpndMayKilled(LiveGRFBuckets *globalSendsLB,
51075179
}
51085180
}
51095181

5182+
//
5183+
// Set the global ID of the send node which will be killed by current basic
5184+
// block. This is the prepration work for reaching define data flow analysis.
5185+
// The function is used to handle the kernel in which only send instruction need
5186+
// SBID. The smallest GRF granularity of send operand is 1 GRF ,and is GRF
5187+
// aligned. So the node in the bucket is the node will be killed by current BB.
5188+
//
5189+
void G4_BB_SB::setSendGlobalIDMayKilledByCurrentBB(
5190+
std::vector<SparseBitVector> &dstTokenBit,
5191+
std::vector<SparseBitVector> &srcTokenBit, SBNODE_VECT &SBNodes,
5192+
PointsToAnalysis &p) {
5193+
5194+
if (first_node == INVALID_ID) {
5195+
return;
5196+
}
5197+
5198+
for (auto srcGRF : BBGRF.src) {
5199+
// RAW
5200+
if (!dstTokenBit[srcGRF].empty()) {
5201+
send_may_kill.dst |= dstTokenBit[srcGRF];
5202+
}
5203+
}
5204+
5205+
for (auto dstGRF : BBGRF.dst) {
5206+
if (!dstTokenBit[dstGRF].empty()) {
5207+
//WAW
5208+
send_may_kill.dst |= dstTokenBit[dstGRF];
5209+
send_WAW_may_kill |= dstTokenBit[dstGRF];
5210+
}
5211+
5212+
if (!srcTokenBit[dstGRF].empty()) {
5213+
//WAR
5214+
send_may_kill.src |= srcTokenBit[dstGRF];
5215+
}
5216+
}
5217+
}
5218+
51105219
bool G4_BB_SB::getFootprintForOperand(SBNode *node, G4_INST *inst,
51115220
G4_Operand *opnd,
51125221
Gen4_Operand_Number opndNum) {
@@ -6725,6 +6834,15 @@ void G4_BB_SB::SBDDD(G4_BB *bb, LiveGRFBuckets *&LB,
67256834
bucketNodes[BD.footprint].push_back(newNode);
67266835
LB->add(newNode, BD.bucket);
67276836
}
6837+
6838+
if (BD.bucket < (int)(builder.kernel.getNumRegTotal() +
6839+
builder.getNumScalarRegisters())) {
6840+
if (BD.opndNum == Opnd_dst) {
6841+
BBGRF.setDst(BD.bucket, true);
6842+
} else {
6843+
BBGRF.setSrc(BD.bucket, true);
6844+
}
6845+
}
67286846
}
67296847
} else {
67306848
std::vector<SBBucketNode *> bucketNodes(
@@ -6738,6 +6856,14 @@ void G4_BB_SB::SBDDD(G4_BB *bb, LiveGRFBuckets *&LB,
67386856
}
67396857

67406858
LB->add(bucketNodes[BD.opndNum], BD.bucket);
6859+
if (BD.bucket < (int)(builder.kernel.getNumRegTotal() +
6860+
builder.getNumScalarRegisters())) {
6861+
if (BD.opndNum == Opnd_dst) {
6862+
BBGRF.setDst(BD.bucket, true);
6863+
} else {
6864+
BBGRF.setSrc(BD.bucket, true);
6865+
}
6866+
}
67416867
}
67426868
}
67436869

visa/LocalScheduler/SWSB_G4IR.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,8 @@ class G4_BB_SB {
702702
unsigned *tokenLiveInDist;
703703
unsigned *tokenLiveOutDist;
704704
SBBitSets localReachingSends;
705+
SBBitSets
706+
BBGRF; // Is used to record the GRF registers accessed by each basic block
705707

706708
SBBUCKET_VECTOR
707709
globalARSendOpndList; // All send source operands which live out their
@@ -799,6 +801,11 @@ class G4_BB_SB {
799801
// Global SBID dependence analysis
800802
void setSendOpndMayKilled(LiveGRFBuckets *globalSendsLB, SBNODE_VECT &SBNodes,
801803
PointsToAnalysis &p);
804+
void
805+
setSendGlobalIDMayKilledByCurrentBB(std::vector<SparseBitVector> &dstTokenBit,
806+
std::vector<SparseBitVector> &srcTokenBit,
807+
SBNODE_VECT &SBNodes, PointsToAnalysis &p);
808+
802809
void dumpTokenLiveInfo(SBNODE_VECT *SBSendNodes);
803810
void getLiveBucketsFromFootprint(const SBFootprint *firstFootprint,
804811
SBBucketNode *sBucketNode,
@@ -955,7 +962,6 @@ class SWSB {
955962
uint32_t AWSyncAllCount = 0;
956963
uint32_t tokenReuseCount = 0;
957964

958-
bool hasFCall = false;
959965
// Linear scan data structures for token allocation
960966
SBNODE_LIST linearScanLiveNodes;
961967

@@ -1075,6 +1081,11 @@ class SWSB {
10751081
INST_LIST_ITER inst_it, int newInstID, BitSet *dstTokens,
10761082
BitSet *srcTokens, bool &keepDst, bool removeAllToken);
10771083

1084+
void
1085+
SWSBInitializeGlobalNodesInBuckets(std::vector<SparseBitVector> &dstGlobalIDs,
1086+
std::vector<SparseBitVector> &srcGlobalIDs,
1087+
LiveGRFBuckets &globalSendsLB);
1088+
10781089
void SWSBDepDistanceGenerator(PointsToAnalysis &p, LiveGRFBuckets &LB,
10791090
LiveGRFBuckets &globalSendsLB);
10801091
void handleFuncCall();

0 commit comments

Comments
 (0)