@@ -458,14 +458,13 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
458
458
unsigned short LB = 0 ;
459
459
unsigned short RB = 0 ;
460
460
G4_Type type = opnd->getType ();
461
- unsigned short bitToBytes = numEltPerGRF<Type_UB>() / 16 ;
462
461
bool valid = true ;
463
462
unsigned subRegOff = opnd->getBase ()->ExSubRegNum (valid);
464
- LB = (unsigned short )(opnd->getLeftBound () + subRegOff * 16 ) * bitToBytes ;
465
- RB = (unsigned short )(opnd->getRightBound () + subRegOff * 16 ) * bitToBytes ;
463
+ LB = (unsigned short )(opnd->getLeftBound () + subRegOff * 16 ) * FLAG_TO_GRF_MAP ;
464
+ RB = (unsigned short )(opnd->getRightBound () + subRegOff * 16 ) * FLAG_TO_GRF_MAP ;
466
465
467
- LB += (builder.kernel .getNumRegTotal () + builder.kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
468
- RB += (builder.kernel .getNumRegTotal () + builder.kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
466
+ LB += (builder.kernel .getNumRegTotal () + builder.getNumScalarRegisters () + builder. kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
467
+ RB += (builder.kernel .getNumRegTotal () + builder.getNumScalarRegisters () + builder. kernel .getNumAcc ()) * numEltPerGRF<Type_UB>();
469
468
470
469
void * allocedMem = mem.alloc (sizeof (SBFootprint));
471
470
SBFootprint* footprint = nullptr ;
@@ -475,6 +474,7 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
475
474
return footprint;
476
475
}
477
476
477
+
478
478
static bool compareInterval (SBNode* n1, SBNode* n2)
479
479
{
480
480
return n1->getLiveStartID () < n2->getLiveStartID ();
@@ -1256,8 +1256,8 @@ void SWSB::SWSBGenerator()
1256
1256
kernel.fg .findNaturalLoops ();
1257
1257
1258
1258
// Note that getNumFlagRegisters() treat each 16 bits as a flag register
1259
- LiveGRFBuckets LB (mem, kernel.getNumRegTotal () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1260
- LiveGRFBuckets globalSendsLB (mem, kernel.getNumRegTotal () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1259
+ LiveGRFBuckets LB (mem, kernel.getNumRegTotal () + fg. builder -> getNumScalarRegisters () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1260
+ LiveGRFBuckets globalSendsLB (mem, kernel.getNumRegTotal () + fg. builder -> getNumScalarRegisters () + kernel.getNumAcc () + fg.builder ->getNumFlagRegisters (), kernel);
1261
1261
1262
1262
SWSBDepDistanceGenerator (p, LB, globalSendsLB);
1263
1263
@@ -3451,6 +3451,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3451
3451
synInst->setDistance (inst->getDistance ());
3452
3452
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3453
3453
inst->setDistance (0 );
3454
+ inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3454
3455
insertedSync = true ;
3455
3456
}
3456
3457
}
@@ -3469,6 +3470,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3469
3470
synInst->setDistance (inst->getDistance ());
3470
3471
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3471
3472
inst->setDistance (0 );
3473
+ inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3472
3474
insertedSync = true ;
3473
3475
}
3474
3476
}
@@ -3488,6 +3490,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
3488
3490
synInst->setDistance (inst->getDistance ());
3489
3491
synInst->setDistanceTypeXe (inst->getDistanceTypeXe ());
3490
3492
inst->setDistance (0 );
3493
+ inst->setDistanceTypeXe (G4_INST::DistanceType::DIST_NONE);
3491
3494
insertedSync = true ;
3492
3495
}
3493
3496
}
@@ -4420,6 +4423,7 @@ bool G4_BB_SB::getFootprintForOperand(SBNode* node,
4420
4423
}
4421
4424
}
4422
4425
4426
+
4423
4427
return hasDistOneAReg;
4424
4428
}
4425
4429
@@ -4466,7 +4470,8 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
4466
4470
G4_RegVar* ptvar = NULL ;
4467
4471
int vid = 0 ;
4468
4472
4469
- while ((ptvar = p.getPointsTo (addrdcl->getRegVar (), vid++)) != NULL )
4473
+ unsigned char offset = 0 ;
4474
+ while ((ptvar = p.getPointsTo (addrdcl->getRegVar (), vid++, offset)) != NULL )
4470
4475
{
4471
4476
4472
4477
uint32_t varID = ptvar->getId ();
@@ -4496,10 +4501,13 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
4496
4501
uint32_t regNum = var->getPhyReg ()->asGreg ()->getRegNum ();
4497
4502
uint32_t regOff = var->getPhyRegOff ();
4498
4503
4499
- linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ());
4500
- linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ()) + dcl->getByteSize () - 1 ;
4504
+ {
4505
+ linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ());
4506
+ linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize (dcl->getElemType ()) + dcl->getByteSize () - 1 ;
4507
+ }
4501
4508
}
4502
4509
4510
+
4503
4511
void * allocedMem = mem.alloc (sizeof (SBFootprint));
4504
4512
footprint = new (allocedMem)SBFootprint (GRF_T, type, (unsigned short )linearizedStart, (unsigned short )linearizedEnd, node->GetInstruction ());
4505
4513
node->setFootprint (footprint, opnd_num);
@@ -4529,11 +4537,11 @@ void G4_BB_SB::getGRFBuckets(SBNode* node,
4529
4537
continue ;
4530
4538
}
4531
4539
4532
- int aregOffset = totalGRFNum;
4533
4540
int startingBucket = curFootprint->LeftB / numEltPerGRF<Type_UB>();
4534
4541
int endingBucket = curFootprint->RightB / numEltPerGRF<Type_UB>();
4535
4542
if (curFootprint->fType == ACC_T)
4536
4543
{
4544
+ int aregOffset = totalGRFNum + builder.getNumScalarRegisters ();
4537
4545
startingBucket = startingBucket + aregOffset;
4538
4546
endingBucket = endingBucket + aregOffset;
4539
4547
}
@@ -5023,16 +5031,6 @@ bool G4_BB_SB::isLastDpas(SBNode* curNode, SBNode* nextNode)
5023
5031
return true ;
5024
5032
}
5025
5033
5026
- void G4_BB_SB::pushItemToQueue (std::vector<unsigned > *nodeIDQueue, unsigned nodeID)
5027
- {
5028
- nodeIDQueue->push_back (nodeID);
5029
-
5030
- if (nodeIDQueue->size () > SWSB_MAX_ALU_DEPENDENCE_DISTANCE_VALUE)
5031
- {
5032
- nodeIDQueue->erase (nodeIDQueue->begin ());
5033
- }
5034
- }
5035
-
5036
5034
5037
5035
void G4_BB_SB::SBDDD (G4_BB* bb,
5038
5036
LiveGRFBuckets*& LB,
@@ -5231,22 +5229,18 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
5231
5229
{
5232
5230
case PIPE_INT:
5233
5231
node->setIntegerID (integerID);
5234
- pushItemToQueue (latestInstID[PIPE_INT], node->getNodeID ());
5235
5232
integerID++;
5236
5233
break ;
5237
5234
case PIPE_FLOAT:
5238
5235
node->setFloatID (floatID);
5239
- pushItemToQueue (latestInstID[PIPE_FLOAT], node->getNodeID ());
5240
5236
floatID++;
5241
5237
break ;
5242
5238
case PIPE_LONG:
5243
5239
node->setLongID (longID);
5244
- pushItemToQueue (latestInstID[PIPE_LONG], node->getNodeID ());
5245
5240
longID++;
5246
5241
break ;
5247
5242
case PIPE_MATH:
5248
5243
node->setMathID (mathID);
5249
- pushItemToQueue (latestInstID[PIPE_MATH], node->getNodeID ());
5250
5244
mathID++;
5251
5245
break ;
5252
5246
default :
@@ -5474,7 +5468,7 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
5474
5468
if (distanceHonourInstruction (liveInst))
5475
5469
{
5476
5470
if (dep == RAW &&
5477
- curBucket < totalGRFNum)
5471
+ ( curBucket < ( totalGRFNum + ( int )builder. getNumScalarRegisters ())) )
5478
5472
{// Only need track GRF RAW dependence
5479
5473
LB->killOperand (bn_it);
5480
5474
setDistance (curFootprint, node, liveNode, false );
@@ -6079,21 +6073,17 @@ void SWSB::dumpTokenLiveInfo()
6079
6073
void G4_BB_SB::getLiveBucketsFromFootprint (const SBFootprint* firstFootprint, SBBucketNode* sBucketNode , LiveGRFBuckets* send_use_kills) const
6080
6074
{
6081
6075
const SBFootprint* footprint = firstFootprint;
6082
- int aregOffset = totalGRFNum;
6083
6076
6084
6077
while (footprint)
6085
6078
{
6086
6079
int startBucket = footprint->LeftB / numEltPerGRF<Type_UB>();
6087
6080
int endBucket = footprint->RightB / numEltPerGRF<Type_UB>();
6088
- if (footprint->fType == ACC_T)
6089
- {
6090
- startBucket = startBucket + aregOffset;
6091
- endBucket = endBucket + aregOffset;
6092
- }
6093
- else if (footprint->fType == FLAG_T)
6081
+
6082
+ // We only track the global dependence for GRF
6083
+ if (footprint->fType != GRF_T)
6094
6084
{
6095
- startBucket = footprint->LeftB + aregOffset + builder. kernel . getNumAcc () ;
6096
- endBucket = footprint-> RightB + aregOffset + builder. kernel . getNumAcc () ;
6085
+ footprint = footprint->next ;
6086
+ continue ;
6097
6087
}
6098
6088
6099
6089
for (int j = startBucket; j < endBucket + 1 ; j++)
0 commit comments