@@ -1171,33 +1171,25 @@ bool DDD::hasReadSuppression(G4_INST* prevInst, G4_INST* nextInst, bool multiSup
1171
1171
return suppressionSrcs > 1 ;
1172
1172
}
1173
1173
1174
- bool DDD::hasSameSourceOneDPAS (G4_INST *curInst, G4_INST *nextInst, BitSet &liveDst, BitSet &liveSrc)
1175
- {
1176
- G4_Type curTypes[4 ];
1177
- G4_Type nextTypes[4 ];
1178
1174
1179
- // Get Types
1180
- for (int i = 0 ; i < 4 ; i++)
1181
- {
1182
- curTypes[i] = Type_UNDEF;
1183
- nextTypes[i] = Type_UNDEF;
1184
- }
1185
- curTypes[0 ] = curInst->getDst ()->getType ();
1186
- nextTypes[0 ] = nextInst->getDst ()->getType ();
1187
- for (int i = 0 ; i < 3 ; i++)
1188
- {
1189
- curTypes[i + 1 ] = curInst->getSrc (i)->getType ();
1190
- nextTypes[i + 1 ] = nextInst->getSrc (i)->getType ();
1191
- }
1175
+ bool DDD::hsaSameTypesAllOperands (const G4_INST& curInst, const G4_INST& nextInst) const
1176
+ {
1177
+ assert (curInst.getNumDst () == 1 && curInst.getNumDst () == nextInst.getNumDst ());
1178
+ if (curInst.getDst ()->getType () != nextInst.getDst ()->getType ())
1179
+ return false ;
1192
1180
1193
- // Same type for all operands
1194
- for (int i = 0 ; i < 4 ; i++)
1195
- {
1196
- if (curTypes[i] != nextTypes[i])
1197
- {
1181
+ assert (curInst.getNumSrc () == nextInst.getNumSrc ());
1182
+ for (auto i = 0 ; i < curInst.getNumSrc (); ++i)
1183
+ if (curInst.getSrc (i)->getType () != nextInst.getSrc (i)->getType ())
1198
1184
return false ;
1199
- }
1200
- }
1185
+
1186
+ return true ;
1187
+ }
1188
+
1189
+ bool DDD::hasSameSourceOneDPAS (G4_INST *curInst, G4_INST *nextInst, BitSet &liveDst, BitSet &liveSrc)
1190
+ {
1191
+ if (!hsaSameTypesAllOperands (*curInst, *nextInst))
1192
+ return false ;
1201
1193
1202
1194
G4_InstDpas* curDpasInst = curInst->asDpasInst ();
1203
1195
G4_InstDpas* nextDpasInst = nextInst->asDpasInst ();
@@ -1404,9 +1396,14 @@ DDD::DDD(Mem_Manager& m, G4_BB* bb, const LatencyTable& lt, G4_Kernel* k)
1404
1396
liveSrc.clear ();
1405
1397
liveDst.clear ();
1406
1398
1407
- while (nextInst-> isDpas () &&
1408
- hasSameSourceOneDPAS (curInst, nextInst, liveDst, liveSrc ))
1399
+ // group continuous dpas in the same node if they can potentially form a dpas macro
1400
+ while ( nextInst-> isDpas ( ))
1409
1401
{
1402
+ bool canGroup = false ;
1403
+ canGroup = hasSameSourceOneDPAS (curInst, nextInst, liveDst, liveSrc);
1404
+ if (!canGroup)
1405
+ break ;
1406
+
1410
1407
// Pushed to the same node
1411
1408
node->instVec .insert (node->instVec .begin (), nextInst);
1412
1409
nodeId--;
@@ -2116,7 +2113,7 @@ struct criticalCmpForMad
2116
2113
};
2117
2114
2118
2115
// 1).The priority queue is ordered as original sequence order.
2119
- // 2).If there is a mad instruction be scheduled, trying to search the candidate which has read suppression in src1and src2.
2116
+ // 2).If there is a mad instruction be scheduled, trying to search the candidate which has read suppression in src1 and src2.
2120
2117
// 3).The scheduling is only applied to the BB whose instructions are mostly mad.
2121
2118
// 4).This scheduling is not for general instruction scheduling, it's controlled by option vISA_ScheduleForReadSuppression
2122
2119
uint32_t DDD::listScheduleForSuppression (G4_BB_Schedule* schedule)
0 commit comments