Skip to content

Commit a6b703d

Browse files
bcheng0127igcbot
authored andcommitted
Use the precision as the data type of DPAS src1 and src2
For DPAS src1 and src2, the data type is fake one, only precision can accurately descript the data type
1 parent 69a1364 commit a6b703d

File tree

5 files changed

+90
-52
lines changed

5 files changed

+90
-52
lines changed

visa/G4_IR.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,19 @@ class G4_InstDpas : public G4_INST {
990990
return GenPrecisionTable[(int)P].BitSize;
991991
}
992992

993+
static bool hasSamePrecision(GenPrecision p1, GenPrecision p2) {
994+
if (p1 == p2) {
995+
return true;
996+
}
997+
998+
return (((p1 == GenPrecision::U8 || p1 == GenPrecision::S8) &&
999+
(p2 == GenPrecision::U8 || p2 == GenPrecision::S8)) ||
1000+
((p1 == GenPrecision::U4 || p1 == GenPrecision::S4 ||
1001+
p1 == GenPrecision::U2 || p1 == GenPrecision::S2) &&
1002+
(p2 == GenPrecision::U4 || p2 == GenPrecision::S4 ||
1003+
p2 == GenPrecision::U2 || p2 == GenPrecision::S2)));
1004+
}
1005+
9931006
G4_InstDpas(const IR_Builder &builder, G4_opcode o, G4_ExecSize size,
9941007
G4_DstRegRegion *d, G4_Operand *s0, G4_Operand *s1,
9951008
G4_Operand *s2, G4_Operand *s3, G4_InstOpts opt, GenPrecision a,
@@ -1015,6 +1028,14 @@ class G4_InstDpas : public G4_INST {
10151028
GenPrecision getSrc1Precision() const { return Src1Precision; }
10161029
GenPrecision getSrc2Precision() const { return Src2Precision; }
10171030

1031+
bool hasSameSrc1Precision(GenPrecision p) const {
1032+
return hasSamePrecision(Src1Precision, p);
1033+
}
1034+
1035+
bool hasSameSrc2Precision(GenPrecision p) const {
1036+
return hasSamePrecision(Src2Precision, p);
1037+
}
1038+
10181039
void setRepeatCount(uint8_t rc) { RepeatCount = rc; }
10191040
// data size per lane (data size per each systolic depth)
10201041
uint32_t getPrecisionSizePerLaneInByte(GenPrecision P) const {

visa/LocalScheduler/LocalScheduler_G4IR.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,24 +1183,32 @@ bool DDD::hasReadSuppression(G4_INST *prevInst, G4_INST *nextInst,
11831183
}
11841184

11851185

1186-
bool DDD::hsaSameTypesAllOperands(const G4_INST &curInst,
1187-
const G4_INST &nextInst) const {
1186+
bool DDD::DPASHasSameTypesAllOperands(const G4_INST &curInst,
1187+
const G4_INST &nextInst) const {
1188+
vASSERT(curInst.isDpas() && nextInst.isDpas());
11881189
vASSERT(curInst.getNumDst() == 1 &&
11891190
curInst.getNumDst() == nextInst.getNumDst());
1191+
vASSERT(curInst.getNumSrc() == nextInst.getNumSrc());
1192+
11901193
if (curInst.getDst()->getType() != nextInst.getDst()->getType())
11911194
return false;
11921195

1193-
vASSERT(curInst.getNumSrc() == nextInst.getNumSrc());
1194-
for (auto i = 0; i < curInst.getNumSrc(); ++i)
1195-
if (curInst.getSrc(i)->getType() != nextInst.getSrc(i)->getType())
1196-
return false;
1196+
if (curInst.getSrc(0)->getType() != nextInst.getSrc(0)->getType())
1197+
return false;
1198+
1199+
if (!curInst.asDpasInst()->hasSameSrc1Precision(
1200+
nextInst.asDpasInst()->getSrc1Precision()) ||
1201+
!curInst.asDpasInst()->hasSameSrc2Precision(
1202+
nextInst.asDpasInst()->getSrc2Precision())) {
1203+
return false;
1204+
}
11971205

11981206
return true;
11991207
}
12001208

12011209
bool DDD::hasSameSourceOneDPAS(G4_INST *curInst, G4_INST *nextInst,
12021210
BitSet &liveDst, BitSet &liveSrc) const {
1203-
if (!hsaSameTypesAllOperands(*curInst, *nextInst))
1211+
if (!DPASHasSameTypesAllOperands(*curInst, *nextInst))
12041212
return false;
12051213

12061214
G4_InstDpas *curDpasInst = curInst->asDpasInst();

visa/LocalScheduler/LocalScheduler_G4IR.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,8 +276,8 @@ class DDD {
276276
private:
277277
bool hasSameSourceOneDPAS(G4_INST *curInst, G4_INST *nextInst,
278278
BitSet &liveDst, BitSet &liveSrc) const;
279-
bool hsaSameTypesAllOperands(const G4_INST &curInst,
280-
const G4_INST &nextInst) const;
279+
bool DPASHasSameTypesAllOperands(const G4_INST &curInst,
280+
const G4_INST &nextInst) const;
281281

282282
public:
283283
DDD(G4_BB *bb, const LatencyTable &lt, G4_Kernel *k, PointsToAnalysis &p);

visa/LocalScheduler/SWSB_G4IR.cpp

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -181,29 +181,6 @@ static bool operandOverlap(G4_Operand *opnd1, G4_Operand *opnd2) {
181181
opnd2->getLinearizedEnd() > opnd1->getLinearizedStart());
182182
}
183183

184-
static G4_Type getDPASDataType(GenPrecision p) {
185-
switch (p) {
186-
case GenPrecision::U1:
187-
case GenPrecision::U2:
188-
case GenPrecision::U4:
189-
case GenPrecision::U8:
190-
return Type_UB;
191-
case GenPrecision::S2:
192-
case GenPrecision::S4:
193-
case GenPrecision::S8:
194-
return Type_B;
195-
case GenPrecision::FP16:
196-
return Type_HF;
197-
case GenPrecision::BF16:
198-
return Type_BF;
199-
case GenPrecision::TF32:
200-
return Type_UNDEF;
201-
default:
202-
vISA_ASSERT_UNREACHABLE("illegal Operand Precision");
203-
return Type_UD;
204-
}
205-
}
206-
207184
bool SBFootprint::hasOverlap(const SBFootprint *liveFootprint,
208185
unsigned short &internalOffset) const {
209186
for (const SBFootprint *curFootprintPtr = this; curFootprintPtr;
@@ -232,7 +209,8 @@ bool SBFootprint::hasOverlap(const SBFootprint *liveFootprint,
232209
for (const SBFootprint *curFootprintPtr = this; curFootprintPtr;
233210
curFootprintPtr = curFootprintPtr->next) {
234211
FOOTPRINT_TYPE curFType = curFootprintPtr->fType;
235-
G4_Type curType = curFootprintPtr->type;
212+
const unsigned short curType = curFootprintPtr->type;
213+
bool isPrecision = curFootprintPtr->isPrecision;
236214
for (const SBFootprint *curFootprint2Ptr = liveFootprint; curFootprint2Ptr;
237215
curFootprint2Ptr = curFootprint2Ptr->next) {
238216
// Negative of no overlap: !(LeftB > curFootprint2Ptr->RightB || RightB
@@ -241,11 +219,11 @@ bool SBFootprint::hasOverlap(const SBFootprint *liveFootprint,
241219
if (curFootprintPtr->LeftB <= curFootprint2Ptr->RightB &&
242220
curFootprintPtr->RightB >= curFootprint2Ptr->LeftB) {
243221
internalOffset = curFootprint2Ptr->offset;
244-
if (curFType == GRF_T && IS_BTYPE(curType)) {
222+
if (curFType == GRF_T && !isPrecision && IS_BTYPE(curType)) {
245223
isRMWOverlap = true;
246224
}
247225
return true;
248-
} else if (curFType == GRF_T && IS_BTYPE(curType)) {
226+
} else if (curFType == GRF_T && !isPrecision && IS_BTYPE(curType)) {
249227
unsigned short w_LeftB = curFootprintPtr->LeftB / 2;
250228
unsigned short w_RightB = curFootprintPtr->RightB / 2;
251229
unsigned short w_curLeftB = curFootprint2Ptr->LeftB / 2;
@@ -318,6 +296,17 @@ bool SBFootprint::isWholeOverlap(const SBFootprint *liveFootprint) const {
318296
return findOverlap;
319297
}
320298

299+
bool SBFootprint::hasSameType(const SBFootprint *liveFootprint) const {
300+
if (isPrecision != liveFootprint->isPrecision) {
301+
return false;
302+
}
303+
if (isPrecision) {
304+
return G4_InstDpas::hasSamePrecision((GenPrecision)type,
305+
(GenPrecision)liveFootprint->type);
306+
}
307+
return type == liveFootprint->type;
308+
}
309+
321310
// check if the current footprint has the same range with given one, or they
322311
// are not overlapped at all
323312
bool SBFootprint::isSameOrNoOverlap(const SBFootprint *liveFootprint) const {
@@ -1013,6 +1002,9 @@ SBFootprint *G4_BB_SB::getFootprintForGRF(G4_Operand *opnd,
10131002
unsigned short RB = 0;
10141003
int aregOffset = totalGRFNum;
10151004
G4_Type type = opnd->getType();
1005+
GenPrecision precision = GenPrecision::INVALID;
1006+
bool isPrecision = false;
1007+
10161008
if (inst->opcode() == G4_fcvt &&
10171009
(IS_BTYPE(type) ||
10181010
(type == Type_UD && builder.hasPartialInt64Support()))) {
@@ -1023,11 +1015,12 @@ SBFootprint *G4_BB_SB::getFootprintForGRF(G4_Operand *opnd,
10231015
}
10241016

10251017
if (inst->isDpas() && (opnd_num == Opnd_src1 || opnd_num == Opnd_src2)) {
1018+
isPrecision = true;
10261019
if (opnd_num == Opnd_src1) {
1027-
type = getDPASDataType(inst->asDpasInst()->getSrc1Precision());
1020+
precision = inst->asDpasInst()->getSrc1Precision();
10281021
}
10291022
if (opnd_num == Opnd_src2) {
1030-
type = getDPASDataType(inst->asDpasInst()->getSrc2Precision());
1023+
precision = inst->asDpasInst()->getSrc2Precision();
10311024
}
10321025
}
10331026

@@ -1119,6 +1112,7 @@ SBFootprint *G4_BB_SB::getFootprintForGRF(G4_Operand *opnd,
11191112
}
11201113

11211114
SBFootprint *footprint =
1115+
isPrecision ? new (allocedMem) SBFootprint(GRF_T, precision, LB, RB, inst) :
11221116
new (allocedMem) SBFootprint(GRF_T, type, LB, RB, inst);
11231117

11241118
return footprint;
@@ -5551,10 +5545,14 @@ void G4_BB_SB::setDistance(const SBFootprint *footprint, SBNode *node,
55515545
SBDISTDEP_ITEM depItem;
55525546
depItem.liveNodePipe = liveNode->ALUPipe;
55535547
depItem.nodePipe = node->ALUPipe;
5554-
depItem.operandType = getDataTypePipeXe(builder, footprint->type);
55555548
depItem.dstDep = dstDep;
55565549
if (node->GetInstruction()->isSend()) {
55575550
depItem.operandType = PIPE_SEND;
5551+
} else if (node->GetInstruction()->isDpas()) {
5552+
depItem.operandType = PIPE_DPAS;
5553+
} else { //Precision is only used in DPAS
5554+
depItem.operandType =
5555+
getDataTypePipeXe(builder, (G4_Type)footprint->type);
55585556
}
55595557
vISA_ASSERT(currentID > prevID, "Wrong node ALU ID");
55605558
unsigned distance = node->setDistance(currentID - prevID);
@@ -5696,20 +5694,21 @@ bool G4_BB_SB::src2SameFootPrintDiffType(SBNode *curNode,
56965694
fp = fp->next) {
56975695
unsigned short leftB = fp->LeftB / builder.numEltPerGRF<Type_UB>();
56985696
unsigned short rightB = fp->RightB / builder.numEltPerGRF<Type_UB>();
5699-
G4_Type type = fp->type;
5697+
vASSERT(fp->isPrecision);
5698+
GenPrecision p = (GenPrecision)fp->type;
57005699

57015700
for (const SBFootprint *nextfp = nextNode->getFirstFootprint(Opnd_src2);
57025701
nextfp; nextfp = nextfp->next) {
57035702
unsigned short nextLeftB =
57045703
nextfp->LeftB / builder.numEltPerGRF<Type_UB>();
57055704
unsigned short nextRightB =
57065705
nextfp->RightB / builder.numEltPerGRF<Type_UB>();
5707-
G4_Type nextType = nextfp->type;
5706+
vASSERT(nextfp->isPrecision);
5707+
GenPrecision nextP = (GenPrecision)nextfp->type;
57085708

57095709
if (!(nextLeftB > rightB || nextRightB < leftB)) {
5710-
if (type != nextType) {
5711-
return true;
5712-
}
5710+
return !G4_InstDpas::hasSamePrecision((GenPrecision)p,
5711+
(GenPrecision)nextP);
57135712
}
57145713
}
57155714
}
@@ -5766,8 +5765,8 @@ bool G4_BB_SB::isLastDpas(SBNode *curNode, SBNode *nextNode)
57665765
{Opnd_src0, Opnd_src1, Opnd_src2, Opnd_dst}) {
57675766
if (curNode->getFirstFootprint(opndNum) &&
57685767
nextNode->getFirstFootprint(opndNum) &&
5769-
curNode->getFirstFootprint(opndNum)->type !=
5770-
nextNode->getFirstFootprint(opndNum)->type) {
5768+
!curNode->getFirstFootprint(opndNum)->hasSameType(
5769+
nextNode->getFirstFootprint(opndNum))) {
57715770
return true;
57725771
}
57735772
}
@@ -5790,13 +5789,16 @@ bool G4_BB_SB::isLastDpas(SBNode *curNode, SBNode *nextNode)
57905789
}
57915790

57925791
if (VISA_WA_CHECK(builder.getPWaTable(), Wa_16011859583) ||
5793-
VISA_WA_CHECK(builder.getPWaTable(), Wa_14012420496) ||
5794-
builder.getOption(vISA_NoDPASMacro)) {
5792+
VISA_WA_CHECK(builder.getPWaTable(), Wa_14012420496)) {
57955793
if (curC != 8 || nextC != 8) {
57965794
return true;
57975795
}
57985796
}
57995797

5798+
if (builder.getOption(vISA_NoDPASMacro)) {
5799+
return true;
5800+
}
5801+
58005802
if (builder.hasDpasSrc2ReadSupression() &&
58015803
builder.hasDpasSrc2ReadSupressionSameRegSameType() &&
58025804
src2SameFootPrintDiffType(curNode, nextNode)) {

visa/LocalScheduler/SWSB_G4IR.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,11 @@ typedef enum _FOOTPRINT_TYPE {
143143

144144
struct SBFootprint {
145145
const FOOTPRINT_TYPE fType;
146-
const G4_Type type;
146+
const unsigned short type;
147147
const unsigned short LeftB;
148148
const unsigned short RightB;
149149
unsigned short offset = 0;
150+
bool isPrecision = false;
150151
G4_INST *inst;
151152

152153
// FIXME: The choice of C-style linked list seems suspect given that there are
@@ -157,13 +158,18 @@ struct SBFootprint {
157158
struct SBFootprint *next = nullptr;
158159

159160
SBFootprint()
160-
: fType(GRF_T), type(Type_UNDEF), LeftB(0), RightB(0), inst(nullptr) {
161-
;
161+
: fType(GRF_T), type((unsigned short)Type_UNDEF), LeftB(0), RightB(0),
162+
inst(nullptr) {
163+
isPrecision = false;
162164
}
163165
SBFootprint(FOOTPRINT_TYPE ft, G4_Type t, unsigned short LB,
164166
unsigned short RB, G4_INST *i)
165-
: fType(ft), type(t), LeftB(LB), RightB(RB), inst(i) {
166-
;
167+
: fType(ft), type((unsigned short)t), LeftB(LB), RightB(RB), inst(i) {
168+
isPrecision = false;
169+
}
170+
SBFootprint(FOOTPRINT_TYPE ft, GenPrecision p, unsigned short LB,
171+
unsigned short RB, G4_INST *i)
172+
: fType(ft), type((unsigned short)p), LeftB(LB), RightB(RB), inst(i), isPrecision(true) {
167173
}
168174
~SBFootprint() {}
169175

@@ -176,6 +182,7 @@ struct SBFootprint {
176182
bool hasGRFGrainedOverlap(const SBFootprint *liveFootprint) const;
177183
bool isWholeOverlap(const SBFootprint *liveFootprint) const;
178184
bool isSameOrNoOverlap(const SBFootprint *liveFootprint) const;
185+
bool hasSameType(const SBFootprint *liveFootprint) const;
179186
};
180187

181188
// Bit set which is used for global dependence analysis for SBID.

0 commit comments

Comments
 (0)