Skip to content

Commit 02347db

Browse files
bcheng0127igcbot
authored andcommitted
Update RA
Update RA
1 parent 2887a02 commit 02347db

File tree

17 files changed

+189
-125
lines changed

17 files changed

+189
-125
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4308,6 +4308,11 @@ namespace IGC
43084308
SaveOption(vISA_Compaction, false);
43094309
}
43104310

4311+
if (IGC_IS_FLAG_ENABLED(EnableGatherWithImm))
4312+
{
4313+
SaveOption(vISA_EnableGatherWithImm, true);
4314+
}
4315+
43114316
if (IGC_IS_FLAG_ENABLED(EnableGroupScheduleForBC))
43124317
{
43134318
SaveOption(vISA_EnableGroupScheduleForBC, true);

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ DECLARE_IGC_REGKEY(bool, EnableIGASWSB, false, "Use IGA for SWS
6666
DECLARE_IGC_REGKEY(bool, EnableSWSBStitch, false, "Insert dependence resolve for kernel stitching", true)
6767
DECLARE_IGC_REGKEY(bool, DisableRegDistDep, false, "distable regDist dependence", true)
6868
DECLARE_IGC_REGKEY(bool, EnableQuickTokenAlloc, false, "Insert dependence resolve for kernel stitching", true)
69+
DECLARE_IGC_REGKEY(bool, EnableGatherWithImm, false, "enable gather send with immediate", true)
6970
DECLARE_IGC_REGKEY(bool, SetA0toTdrForSendc, false, "Set A0 to tdr0 before each sendc/sendsc", true)
7071
DECLARE_IGC_REGKEY(bool, ReplaceIndirectCallWithJmpi, false, "Replace indirect call with jmpi instruction (HW WA)", true)
7172
DECLARE_IGC_REGKEY(bool, AssumeUniformIndirectCall, false, "Assume indirect call is uniform to avoid looping code", false)

visa/BuildIRImpl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ bool IR_Builder::isOpndAligned(
229229
else if (opnd->getKind() == G4_Operand::dstRegRegion &&
230230
// Only care about GRF or half-GRF alignment.
231231
(align_byte == numEltPerGRF<Type_UB>() || align_byte == numEltPerGRF<Type_UB>() / 2) &&
232-
dcl && dcl->getRegFile() == G4_ADDRESS)
232+
dcl && (dcl->getRegFile() == G4_ADDRESS))
233233
{
234234

235235
// Get the single definition of the specified operand from the use

visa/GraphColor.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2268,11 +2268,11 @@ void Interference::markInterferenceToAvoidDstSrcOverlap(G4_BB* bb,
22682268
{
22692269
// make every var in points-to set live
22702270
const REGVAR_VECTOR& pointsToSet = liveAnalysis->getPointsToAnalysis().getAllInPointsToOrIndrUse(srcRgn, bb);
2271-
for (auto var : pointsToSet)
2271+
for (auto pt : pointsToSet)
22722272
{
2273-
if (var->isRegAllocPartaker())
2273+
if (pt.var->isRegAllocPartaker())
22742274
{
2275-
unsigned srcId = var->getId();
2275+
unsigned srcId = pt.var->getId();
22762276
if (isDstRegAllocPartaker)
22772277
{
22782278
if (!varSplitCheckBeforeIntf(dstId, srcId))
@@ -2395,11 +2395,11 @@ void Interference::buildInterferenceForDst(G4_BB* bb, BitSet& live, G4_INST* ins
23952395
// add interferences to the list of potential indirect destination accesses.
23962396
//
23972397
const REGVAR_VECTOR& pointsToSet = liveAnalysis->getPointsToAnalysis().getAllInPointsToOrIndrUse(dst, bb);
2398-
for (auto var : pointsToSet)
2398+
for (auto pt : pointsToSet)
23992399
{
2400-
if (var->isRegAllocPartaker())
2400+
if (pt.var->isRegAllocPartaker())
24012401
{
2402-
buildInterferenceWithLive(live, var->getId());
2402+
buildInterferenceWithLive(live, pt.var->getId());
24032403
}
24042404
}
24052405
}
@@ -2559,11 +2559,11 @@ void Interference::buildInterferenceWithinBB(G4_BB* bb, BitSet& live)
25592559
{
25602560
// make every var in points-to set live
25612561
const REGVAR_VECTOR& pointsToSet = liveAnalysis->getPointsToAnalysis().getAllInPointsToOrIndrUse(srcRegion, bb);
2562-
for (auto var : pointsToSet)
2562+
for (auto pt : pointsToSet)
25632563
{
2564-
if (var->isRegAllocPartaker())
2564+
if (pt.var->isRegAllocPartaker())
25652565
{
2566-
updateLiveness(live, var->getId(), true);
2566+
updateLiveness(live, pt.var->getId(), true);
25672567
}
25682568
}
25692569
}
@@ -4252,9 +4252,9 @@ void Augmentation::buildLiveIntervals()
42524252
const REGVAR_VECTOR& pointsToSet = liveAnalysis.getPointsToAnalysis().getAllInPointsToOrIndrUse(srcRegion, curBB);
42534253
for (auto pointsToVar : pointsToSet)
42544254
{
4255-
if (pointsToVar->isRegAllocPartaker())
4255+
if (pointsToVar.var->isRegAllocPartaker())
42564256
{
4257-
updateEndInterval(pointsToVar->getDeclare()->getRootDeclare(), inst);
4257+
updateEndInterval(pointsToVar.var->getDeclare()->getRootDeclare(), inst);
42584258
}
42594259
}
42604260
}
@@ -6848,7 +6848,7 @@ void GlobalRA::determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpill
68486848
}
68496849
else
68506850
{
6851-
REGVAR_VECTOR indrVars;
6851+
ORG_REGVAR_VECTOR indrVars;
68526852

68536853
unsigned dstSpillRegSize = 0;
68546854
unsigned indrDstSpillRegSize = 0;
@@ -6889,13 +6889,13 @@ void GlobalRA::determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpill
68896889
auto pointsToSet = pointsToAnalysis.getAllInPointsTo(dst->getBase()->asRegVar());
68906890
if (pointsToSet != nullptr)
68916891
{
6892-
for (auto var : *pointsToSet)
6892+
for (auto pt : *pointsToSet)
68936893
{
6894-
if (var->isRegAllocPartaker() ||
6895-
((builder.getOption(vISA_HybridRAWithSpill) || builder.getOption(vISA_FastCompileRA)) && livenessCandidate(var->getDeclare())))
6894+
if (pt.var->isRegAllocPartaker() ||
6895+
((builder.getOption(vISA_HybridRAWithSpill) || builder.getOption(vISA_FastCompileRA)) && livenessCandidate(pt.var->getDeclare())))
68966896
{
6897-
indrVars.push_back(var);
6898-
indrDstSpillRegSize += var->getDeclare()->getNumRows();
6897+
indrVars.push_back(pt.var);
6898+
indrDstSpillRegSize += pt.var->getDeclare()->getNumRows();
68996899
}
69006900
}
69016901
}
@@ -6930,15 +6930,15 @@ void GlobalRA::determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpill
69306930
auto pointsToSet = pointsToAnalysis.getAllInPointsTo(src->asSrcRegRegion()->getBase()->asRegVar());
69316931
if (pointsToSet != nullptr)
69326932
{
6933-
for (auto var : *pointsToSet)
6933+
for (auto pt : *pointsToSet)
69346934
{
6935-
if (var->isRegAllocPartaker() ||
6936-
((builder.getOption(vISA_HybridRAWithSpill) || builder.getOption(vISA_FastCompileRA)) && livenessCandidate(var->getDeclare())))
6935+
if (pt.var->isRegAllocPartaker() ||
6936+
((builder.getOption(vISA_HybridRAWithSpill) || builder.getOption(vISA_FastCompileRA)) && livenessCandidate(pt.var->getDeclare())))
69376937
{
6938-
if (std::find(indrVars.begin(), indrVars.end(), var) == indrVars.end())
6938+
if (std::find(indrVars.begin(), indrVars.end(), pt.var) == indrVars.end())
69396939
{
6940-
indrVars.push_back(var);
6941-
indirSrcFillRegSize += var->getDeclare()->getNumRows();
6940+
indrVars.push_back(pt.var);
6941+
indirSrcFillRegSize += pt.var->getDeclare()->getNumRows();
69426942
}
69436943
}
69446944
}

visa/HWCaps.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ SPDX-License-Identifier: MIT
829829

830830
bool hasFiveALUPipes() const
831831
{
832-
return false;
832+
return hasScalarRegister();
833833
}
834834

835835
bool hasSrc2ReadSupression() const

visa/LinearScanRA.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,9 +1643,9 @@ void LinearScanRA::calculateCurrentBBLiveIntervals(G4_BB* bb, std::vector<LSLive
16431643
if (src->asSrcRegRegion()->isIndirect())
16441644
{
16451645
auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(src->getBase()->asRegVar());
1646-
for (auto var : *pointsToSet)
1646+
for (auto pt : *pointsToSet)
16471647
{
1648-
G4_Declare* dcl = var->getDeclare()->getRootDeclare();
1648+
G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
16491649

16501650
setSrcReferences(bb, inst_it, i, dcl, liveIntervals, eotLiveIntervals);
16511651
}
@@ -1669,9 +1669,9 @@ void LinearScanRA::calculateCurrentBBLiveIntervals(G4_BB* bb, std::vector<LSLive
16691669
if (dst->isIndirect())
16701670
{
16711671
auto pointsToSet = l.getPointsToAnalysis().getAllInPointsTo(dst->getBase()->asRegVar());
1672-
for (auto var : *pointsToSet)
1672+
for (auto pt : *pointsToSet)
16731673
{
1674-
G4_Declare* dcl = var->getDeclare()->getRootDeclare();
1674+
G4_Declare* dcl = pt.var->getDeclare()->getRootDeclare();
16751675

16761676
setDstReferences(bb, inst_it, dcl, liveIntervals, eotLiveIntervals);
16771677
}

visa/LocalScheduler/G4_Sched.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ struct RegisterPressure
340340
gra = new GlobalRA(kernel, kernel.fg.builder->phyregpool, *p2a);
341341
// To properly track liveness for partially-written local variables.
342342
gra->markGraphBlockLocalVars();
343-
liveness = new LivenessAnalysis(*gra, G4_GRF | G4_ADDRESS | G4_INPUT | G4_FLAG);
343+
liveness = new LivenessAnalysis(*gra, G4_GRF | G4_ADDRESS | G4_INPUT | G4_FLAG | G4_SCALAR);
344344
liveness->computeLiveness();
345345
rpe = new RPE(*gra, liveness);
346346
rpe->run();

visa/LocalScheduler/SWSB_G4IR.cpp

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -458,14 +458,13 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
458458
unsigned short LB = 0;
459459
unsigned short RB = 0;
460460
G4_Type type = opnd->getType();
461-
unsigned short bitToBytes = numEltPerGRF<Type_UB>() / 16;
462461
bool valid = true;
463462
unsigned subRegOff = opnd->getBase()->ExSubRegNum(valid);
464-
LB = (unsigned short)(opnd->getLeftBound() + subRegOff * 16) * bitToBytes;
465-
RB = (unsigned short)(opnd->getRightBound() + subRegOff * 16) * bitToBytes;
463+
LB = (unsigned short)(opnd->getLeftBound() + subRegOff * 16) * FLAG_TO_GRF_MAP;
464+
RB = (unsigned short)(opnd->getRightBound() + subRegOff * 16) * FLAG_TO_GRF_MAP;
466465

467-
LB += (builder.kernel.getNumRegTotal() + builder.kernel.getNumAcc()) * numEltPerGRF<Type_UB>();
468-
RB += (builder.kernel.getNumRegTotal() + builder.kernel.getNumAcc()) * numEltPerGRF<Type_UB>();
466+
LB += (builder.kernel.getNumRegTotal() + builder.getNumScalarRegisters() + builder.kernel.getNumAcc()) * numEltPerGRF<Type_UB>();
467+
RB += (builder.kernel.getNumRegTotal() + builder.getNumScalarRegisters() + builder.kernel.getNumAcc()) * numEltPerGRF<Type_UB>();
469468

470469
void* allocedMem = mem.alloc(sizeof(SBFootprint));
471470
SBFootprint* footprint = nullptr;
@@ -475,6 +474,7 @@ SBFootprint* G4_BB_SB::getFootprintForFlag(G4_Operand* opnd,
475474
return footprint;
476475
}
477476

477+
478478
static bool compareInterval(SBNode* n1, SBNode* n2)
479479
{
480480
return n1->getLiveStartID() < n2->getLiveStartID();
@@ -1256,8 +1256,8 @@ void SWSB::SWSBGenerator()
12561256
kernel.fg.findNaturalLoops();
12571257

12581258
//Note that getNumFlagRegisters() treat each 16 bits as a flag register
1259-
LiveGRFBuckets LB(mem, kernel.getNumRegTotal() + kernel.getNumAcc() + fg.builder->getNumFlagRegisters(), kernel);
1260-
LiveGRFBuckets globalSendsLB(mem, kernel.getNumRegTotal() + kernel.getNumAcc() + fg.builder->getNumFlagRegisters(), kernel);
1259+
LiveGRFBuckets LB(mem, kernel.getNumRegTotal() + fg.builder->getNumScalarRegisters() + kernel.getNumAcc() + fg.builder->getNumFlagRegisters(), kernel);
1260+
LiveGRFBuckets globalSendsLB(mem, kernel.getNumRegTotal() + fg.builder->getNumScalarRegisters() + kernel.getNumAcc() + fg.builder->getNumFlagRegisters(), kernel);
12611261

12621262
SWSBDepDistanceGenerator(p, LB, globalSendsLB);
12631263

@@ -3451,6 +3451,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
34513451
synInst->setDistance(inst->getDistance());
34523452
synInst->setDistanceTypeXe(inst->getDistanceTypeXe());
34533453
inst->setDistance(0);
3454+
inst->setDistanceTypeXe(G4_INST::DistanceType::DIST_NONE);
34543455
insertedSync = true;
34553456
}
34563457
}
@@ -3469,6 +3470,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
34693470
synInst->setDistance(inst->getDistance());
34703471
synInst->setDistanceTypeXe(inst->getDistanceTypeXe());
34713472
inst->setDistance(0);
3473+
inst->setDistanceTypeXe(G4_INST::DistanceType::DIST_NONE);
34723474
insertedSync = true;
34733475
}
34743476
}
@@ -3488,6 +3490,7 @@ bool SWSB::insertSyncXe(G4_BB* bb, SBNode* node, G4_INST* inst, INST_LIST_ITER i
34883490
synInst->setDistance(inst->getDistance());
34893491
synInst->setDistanceTypeXe(inst->getDistanceTypeXe());
34903492
inst->setDistance(0);
3493+
inst->setDistanceTypeXe(G4_INST::DistanceType::DIST_NONE);
34913494
insertedSync = true;
34923495
}
34933496
}
@@ -4420,6 +4423,7 @@ bool G4_BB_SB::getFootprintForOperand(SBNode* node,
44204423
}
44214424
}
44224425

4426+
44234427
return hasDistOneAReg;
44244428
}
44254429

@@ -4466,7 +4470,8 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
44664470
G4_RegVar* ptvar = NULL;
44674471
int vid = 0;
44684472

4469-
while ((ptvar = p.getPointsTo(addrdcl->getRegVar(), vid++)) != NULL)
4473+
unsigned char offset = 0;
4474+
while ((ptvar = p.getPointsTo(addrdcl->getRegVar(), vid++, offset)) != NULL)
44704475
{
44714476

44724477
uint32_t varID = ptvar->getId();
@@ -4496,10 +4501,13 @@ void G4_BB_SB::getGRFFootprintForIndirect(SBNode* node,
44964501
uint32_t regNum = var->getPhyReg()->asGreg()->getRegNum();
44974502
uint32_t regOff = var->getPhyRegOff();
44984503

4499-
linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize(dcl->getElemType());
4500-
linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize(dcl->getElemType()) + dcl->getByteSize() - 1;
4504+
{
4505+
linearizedStart = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize(dcl->getElemType());
4506+
linearizedEnd = regNum * numEltPerGRF<Type_UB>() + regOff * TypeSize(dcl->getElemType()) + dcl->getByteSize() - 1;
4507+
}
45014508
}
45024509

4510+
45034511
void* allocedMem = mem.alloc(sizeof(SBFootprint));
45044512
footprint = new (allocedMem)SBFootprint(GRF_T, type, (unsigned short)linearizedStart, (unsigned short)linearizedEnd, node->GetInstruction());
45054513
node->setFootprint(footprint, opnd_num);
@@ -4529,11 +4537,11 @@ void G4_BB_SB::getGRFBuckets(SBNode* node,
45294537
continue;
45304538
}
45314539

4532-
int aregOffset = totalGRFNum;
45334540
int startingBucket = curFootprint->LeftB / numEltPerGRF<Type_UB>();
45344541
int endingBucket = curFootprint->RightB / numEltPerGRF<Type_UB>();
45354542
if (curFootprint->fType == ACC_T)
45364543
{
4544+
int aregOffset = totalGRFNum + builder.getNumScalarRegisters();
45374545
startingBucket = startingBucket + aregOffset;
45384546
endingBucket = endingBucket + aregOffset;
45394547
}
@@ -5023,16 +5031,6 @@ bool G4_BB_SB::isLastDpas(SBNode* curNode, SBNode* nextNode)
50235031
return true;
50245032
}
50255033

5026-
void G4_BB_SB::pushItemToQueue(std::vector<unsigned> *nodeIDQueue, unsigned nodeID)
5027-
{
5028-
nodeIDQueue->push_back(nodeID);
5029-
5030-
if (nodeIDQueue->size() > SWSB_MAX_ALU_DEPENDENCE_DISTANCE_VALUE)
5031-
{
5032-
nodeIDQueue->erase(nodeIDQueue->begin());
5033-
}
5034-
}
5035-
50365034

50375035
void G4_BB_SB::SBDDD(G4_BB* bb,
50385036
LiveGRFBuckets*& LB,
@@ -5231,22 +5229,18 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
52315229
{
52325230
case PIPE_INT:
52335231
node->setIntegerID(integerID);
5234-
pushItemToQueue(latestInstID[PIPE_INT], node->getNodeID());
52355232
integerID++;
52365233
break;
52375234
case PIPE_FLOAT:
52385235
node->setFloatID(floatID);
5239-
pushItemToQueue(latestInstID[PIPE_FLOAT], node->getNodeID());
52405236
floatID++;
52415237
break;
52425238
case PIPE_LONG:
52435239
node->setLongID(longID);
5244-
pushItemToQueue(latestInstID[PIPE_LONG], node->getNodeID());
52455240
longID++;
52465241
break;
52475242
case PIPE_MATH:
52485243
node->setMathID(mathID);
5249-
pushItemToQueue(latestInstID[PIPE_MATH], node->getNodeID());
52505244
mathID++;
52515245
break;
52525246
default:
@@ -5474,7 +5468,7 @@ void G4_BB_SB::SBDDD(G4_BB* bb,
54745468
if (distanceHonourInstruction(liveInst))
54755469
{
54765470
if (dep == RAW &&
5477-
curBucket < totalGRFNum)
5471+
(curBucket < (totalGRFNum + (int)builder.getNumScalarRegisters())))
54785472
{//Only need track GRF RAW dependence
54795473
LB->killOperand(bn_it);
54805474
setDistance(curFootprint, node, liveNode, false);
@@ -6079,21 +6073,17 @@ void SWSB::dumpTokenLiveInfo()
60796073
void G4_BB_SB::getLiveBucketsFromFootprint(const SBFootprint* firstFootprint, SBBucketNode* sBucketNode, LiveGRFBuckets* send_use_kills) const
60806074
{
60816075
const SBFootprint* footprint = firstFootprint;
6082-
int aregOffset = totalGRFNum;
60836076

60846077
while (footprint)
60856078
{
60866079
int startBucket = footprint->LeftB / numEltPerGRF<Type_UB>();
60876080
int endBucket = footprint->RightB / numEltPerGRF<Type_UB>();
6088-
if (footprint->fType == ACC_T)
6089-
{
6090-
startBucket = startBucket + aregOffset;
6091-
endBucket = endBucket + aregOffset;
6092-
}
6093-
else if (footprint->fType == FLAG_T)
6081+
6082+
//We only track the global dependence for GRF
6083+
if (footprint->fType != GRF_T)
60946084
{
6095-
startBucket = footprint->LeftB + aregOffset + builder.kernel.getNumAcc();
6096-
endBucket = footprint->RightB + aregOffset + builder.kernel.getNumAcc();
6085+
footprint = footprint->next;
6086+
continue;
60976087
}
60986088

60996089
for (int j = startBucket; j < endBucket + 1; j++)

0 commit comments

Comments
 (0)