@@ -458,13 +458,14 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
458
458
unsigned short LB = 0 ;
459
459
unsigned short RB = 0 ;
460
460
G4_Type type = opnd->getType ();
461
+ unsigned short bitToBytes = numEltPerGRF<Type_UB>() / 16 ;
461
462
bool valid = true ;
462
463
unsigned subRegOff = opnd->getBase ()->ExSubRegNum (valid);
463
- LB = (unsigned short )(opnd->getLeftBound () + subRegOff * 16 ) * FLAG_TO_GRF_MAP ;
464
- RB = (unsigned short )(opnd->getRightBound () + subRegOff * 16 ) * FLAG_TO_GRF_MAP ;
464
+ LB = (unsigned short )(opnd->getLeftBound () + subRegOff * 16 ) * bitToBytes ;
465
+ RB = (unsigned short )(opnd->getRightBound () + subRegOff * 16 ) * bitToBytes ;
465
466
466
- LB += (builder.kernel .getNumRegTotal () + builder.getNumScalarRegisters () + builder. kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
467
- RB += (builder.kernel .getNumRegTotal () + builder.getNumScalarRegisters () + builder. kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
467
+ LB += (builder.kernel .getNumRegTotal () + builder.kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
468
+ RB += (builder.kernel .getNumRegTotal () + builder.kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
468
469
469
470
void * allocedMem = mem.alloc (sizeof (SBFootprint));
470
471
SBFootprint* footprint = nullptr ;
@@ -474,7 +475,6 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
474
475
return footprint;
475
476
}
476
477
477
-
478
478
static bool compareInterval (SBNode* n1, SBNode* n2)
479
479
{
480
480
return n1->getLiveStartID () < n2->getLiveStartID ();
@@ -1256,8 +1256,8 @@ void SWSB::SWSBGenerator()
1256
1256
kernel.fg .findNaturalLoops ();
1257
1257
1258
1258
// Note that getNumFlagRegisters() treat each 16 bits as a flag register
1259
- LiveGRFBuckets LB (mem, kernel.getNumRegTotal () + fg. builder -> getNumScalarRegisters () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1260
- LiveGRFBuckets globalSendsLB (mem, kernel.getNumRegTotal () + fg. builder -> getNumScalarRegisters () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1259
+ LiveGRFBuckets LB (mem, kernel.getNumRegTotal () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1260
+ LiveGRFBuckets globalSendsLB (mem, kernel.getNumRegTotal () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1261
1261
1262
1262
SWSBDepDistanceGenerator (p, LB, globalSendsLB);
1263
1263
@@ -3451,7 +3451,6 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3451
3451
synInst->setDistance (inst->getDistance ());
3452
3452
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3453
3453
inst->setDistance (0 );
3454
- inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3455
3454
insertedSync = true ;
3456
3455
}
3457
3456
}
@@ -3470,7 +3469,6 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3470
3469
synInst->setDistance (inst->getDistance ());
3471
3470
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3472
3471
inst->setDistance (0 );
3473
- inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3474
3472
insertedSync = true ;
3475
3473
}
3476
3474
}
@@ -3490,7 +3488,6 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3490
3488
synInst->setDistance (inst->getDistance ());
3491
3489
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3492
3490
inst->setDistance (0 );
3493
- inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3494
3491
insertedSync = true ;
3495
3492
}
3496
3493
}
@@ -4423,7 +4420,6 @@ bool G4_BB_SB::getFootprintForOperand(SBNode* node,
4423
4420
}
4424
4421
}
4425
4422
4426
-
4427
4423
return hasDistOneAReg;
4428
4424
}
4429
4425
@@ -4470,8 +4466,7 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
4470
4466
G4_RegVar* ptvar = NULL ;
4471
4467
int vid = 0 ;
4472
4468
4473
- unsigned char offset = 0 ;
4474
- while ((ptvar = p.getPointsTo (addrdcl->getRegVar (), vid++, offset)) != NULL )
4469
+ while ((ptvar = p.getPointsTo (addrdcl->getRegVar (), vid++)) != NULL )
4475
4470
{
4476
4471
4477
4472
uint32_t varID = ptvar->getId ();
@@ -4501,13 +4496,10 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
4501
4496
uint32_t regNum = var->getPhyReg ()->asGreg ()->getRegNum ();
4502
4497
uint32_t regOff = var->getPhyRegOff ();
4503
4498
4504
- {
4505
- linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ());
4506
- linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ()) + dcl->getByteSize () - 1 ;
4507
- }
4499
+ linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ());
4500
+ linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ()) + dcl->getByteSize () - 1 ;
4508
4501
}
4509
4502
4510
-
4511
4503
void * allocedMem = mem.alloc (sizeof (SBFootprint));
4512
4504
footprint = new (allocedMem)SBFootprint (GRF_T, type, (unsigned short )linearizedStart, (unsigned short )linearizedEnd, node->GetInstruction ());
4513
4505
node->setFootprint (footprint, opnd_num);
@@ -4537,11 +4529,11 @@ void G4_BB_SB::getGRFBuckets(SBNode* node,
4537
4529
continue ;
4538
4530
}
4539
4531
4532
+ int aregOffset = totalGRFNum;
4540
4533
int startingBucket = curFootprint->LeftB / numEltPerGRF<Type_UB>();
4541
4534
int endingBucket = curFootprint->RightB / numEltPerGRF<Type_UB>();
4542
4535
if (curFootprint->fType == ACC_T)
4543
4536
{
4544
- int aregOffset = totalGRFNum + builder.getNumScalarRegisters ();
4545
4537
startingBucket = startingBucket + aregOffset;
4546
4538
endingBucket = endingBucket + aregOffset;
4547
4539
}
@@ -5031,6 +5023,16 @@ bool G4_BB_SB::isLastDpas(SBNode* curNode, SBNode* nextNode)
5031
5023
return true ;
5032
5024
}
5033
5025
5026
+ void G4_BB_SB::pushItemToQueue (std::vector<unsigned > *nodeIDQueue, unsigned nodeID)
5027
+ {
5028
+ nodeIDQueue->push_back (nodeID);
5029
+
5030
+ if (nodeIDQueue->size () > SWSB_MAX_ALU_DEPENDENCE_DISTANCE_VALUE)
5031
+ {
5032
+ nodeIDQueue->erase (nodeIDQueue->begin ());
5033
+ }
5034
+ }
5035
+
5034
5036
5035
5037
void G4_BB_SB::SBDDD (G4_BB* bb,
5036
5038
LiveGRFBuckets*& LB,
@@ -5229,18 +5231,22 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
5229
5231
{
5230
5232
case PIPE_INT:
5231
5233
node->setIntegerID (integerID);
5234
+ pushItemToQueue (latestInstID[PIPE_INT], node->getNodeID ());
5232
5235
integerID++;
5233
5236
break ;
5234
5237
case PIPE_FLOAT:
5235
5238
node->setFloatID (floatID);
5239
+ pushItemToQueue (latestInstID[PIPE_FLOAT], node->getNodeID ());
5236
5240
floatID++;
5237
5241
break ;
5238
5242
case PIPE_LONG:
5239
5243
node->setLongID (longID);
5244
+ pushItemToQueue (latestInstID[PIPE_LONG], node->getNodeID ());
5240
5245
longID++;
5241
5246
break ;
5242
5247
case PIPE_MATH:
5243
5248
node->setMathID (mathID);
5249
+ pushItemToQueue (latestInstID[PIPE_MATH], node->getNodeID ());
5244
5250
mathID++;
5245
5251
break ;
5246
5252
default :
@@ -5468,7 +5474,7 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
5468
5474
if (distanceHonourInstruction (liveInst))
5469
5475
{
5470
5476
if (dep == RAW &&
5471
- ( curBucket < ( totalGRFNum + ( int )builder. getNumScalarRegisters ())) )
5477
+ curBucket < totalGRFNum)
5472
5478
{// Only need track GRF RAW dependence
5473
5479
LB->killOperand (bn_it);
5474
5480
setDistance (curFootprint, node, liveNode, false );
@@ -6073,17 +6079,21 @@ void SWSB::dumpTokenLiveInfo()
6073
6079
void G4_BB_SB::getLiveBucketsFromFootprint (const SBFootprint* firstFootprint, SBBucketNode* sBucketNode , LiveGRFBuckets* send_use_kills) const
6074
6080
{
6075
6081
const SBFootprint* footprint = firstFootprint;
6082
+ int aregOffset = totalGRFNum;
6076
6083
6077
6084
while (footprint)
6078
6085
{
6079
6086
int startBucket = footprint->LeftB / numEltPerGRF<Type_UB>();
6080
6087
int endBucket = footprint->RightB / numEltPerGRF<Type_UB>();
6081
-
6082
- // We only track the global dependence for GRF
6083
- if (footprint->fType != GRF_T)
6088
+ if (footprint->fType == ACC_T)
6084
6089
{
6085
- footprint = footprint->next ;
6086
- continue ;
6090
+ startBucket = startBucket + aregOffset;
6091
+ endBucket = endBucket + aregOffset;
6092
+ }
6093
+ else if (footprint->fType == FLAG_T)
6094
+ {
6095
+ startBucket = footprint->LeftB + aregOffset + builder.kernel .getNumAcc ();
6096
+ endBucket = footprint->RightB + aregOffset + builder.kernel .getNumAcc ();
6087
6097
}
6088
6098
6089
6099
for (int j = startBucket; j < endBucket + 1 ; j++)
0 commit comments