Skip to content

Commit ebaeab5

Browse files
pratikasharigcbot
authored andcommitted
This change enables spill/fill cleanup for stack call based spills. It
removes the restriction that callee save biased ranges get only callee save partition allocation. For indirectly called functions, the patch hardwires callee address in r125.0 to ease caller save/restore burden on RA. Minor changes have been made to preserve debug info links.
1 parent 45b8705 commit ebaeab5

File tree

8 files changed

+91
-26
lines changed

8 files changed

+91
-26
lines changed

visa/BuildIR.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -775,21 +775,24 @@ class IR_Builder
775775
G4_INST* createSpill(
776776
G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload,
777777
G4_ExecSize execSize,
778-
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option);
778+
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
779+
bool addToInstList);
779780

780781
G4_INST* createSpill(
781782
G4_DstRegRegion* dst, G4_SrcRegRegion* payload,
782783
G4_ExecSize execSize, uint16_t numRows, uint32_t offset,
783-
G4_Declare* fp, G4_InstOption option);
784+
G4_Declare* fp, G4_InstOption option, bool addToInstList);
784785

785786

786787
G4_INST* createFill(
787788
G4_SrcRegRegion* header,
788789
G4_DstRegRegion* dstData, G4_ExecSize execSize,
789-
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option);
790+
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
791+
bool addToInstList);
790792
G4_INST* createFill(
791793
G4_DstRegRegion* dstData, G4_ExecSize execSize,
792-
uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option);
794+
uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option,
795+
bool addToInstList);
793796

794797

795798
// numberOfFlags MEANS NUMBER OF WORDS (e.g., 1 means 16-bit), not number of bits or number of data elements in operands.

visa/BuildIRImpl.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -949,10 +949,11 @@ static const unsigned int HWORD_BYTE_SIZE = 32;
949949
G4_INST* IR_Builder::createSpill(
950950
G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload,
951951
G4_ExecSize execSize,
952-
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option)
952+
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
953+
bool addToInstList)
953954
{
954955
G4_INST* spill = createIntrinsicInst(nullptr, Intrinsic::Spill, execSize, dst,
955-
header, payload, nullptr, option, true);
956+
header, payload, nullptr, option, addToInstList);
956957
spill->asSpillIntrinsic()->setFP(fp);
957958
spill->asSpillIntrinsic()->setOffset((uint32_t)
958959
(((uint64_t)offset * HWORD_BYTE_SIZE) / numEltPerGRF<Type_UB>()));
@@ -963,13 +964,13 @@ G4_INST* IR_Builder::createSpill(
963964
G4_INST* IR_Builder::createSpill(
964965
G4_DstRegRegion* dst, G4_SrcRegRegion* payload,
965966
G4_ExecSize execSize, uint16_t numRows, uint32_t offset,
966-
G4_Declare* fp, G4_InstOption option)
967+
G4_Declare* fp, G4_InstOption option, bool addToInstList)
967968
{
968969
auto builtInR0 = getBuiltinR0();
969970
auto rd = getRegionStride1();
970971
auto srcRgnr0 = createSrc(builtInR0->getRegVar(), 0, 0, rd, Type_UD);
971972
G4_INST* spill = createIntrinsicInst(nullptr, Intrinsic::Spill, execSize, dst,
972-
srcRgnr0, payload, nullptr, option, true);
973+
srcRgnr0, payload, nullptr, option, addToInstList);
973974
spill->asSpillIntrinsic()->setFP(fp);
974975
spill->asSpillIntrinsic()->setOffset((uint32_t)
975976
(((uint64_t)offset * HWORD_BYTE_SIZE) / numEltPerGRF<Type_UB>()));
@@ -980,10 +981,11 @@ G4_INST* IR_Builder::createSpill(
980981
G4_INST* IR_Builder::createFill(
981982
G4_SrcRegRegion* header, G4_DstRegRegion* dstData,
982983
G4_ExecSize execSize,
983-
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option)
984+
uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
985+
bool addToInstList)
984986
{
985987
G4_INST* fill = createIntrinsicInst(nullptr, Intrinsic::Fill, execSize, dstData,
986-
header, nullptr, nullptr, option, true);
988+
header, nullptr, nullptr, option, addToInstList);
987989
fill->asFillIntrinsic()->setFP(fp);
988990
fill->asFillIntrinsic()->setOffset((uint32_t)
989991
(((uint64_t)offset * HWORD_BYTE_SIZE) / numEltPerGRF<Type_UB>()));
@@ -994,13 +996,14 @@ G4_INST* IR_Builder::createFill(
994996
G4_INST* IR_Builder::createFill(
995997
G4_DstRegRegion* dstData,
996998
G4_ExecSize execSize,
997-
uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option)
999+
uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option,
1000+
bool addToInstList)
9981001
{
9991002
auto builtInR0 = getBuiltinR0();
10001003
auto rd = getRegionStride1();
10011004
auto srcRgnr0 = createSrc(builtInR0->getRegVar(), 0, 0, rd, Type_UD);
10021005
G4_INST* fill = createIntrinsicInst(nullptr, Intrinsic::Fill, execSize, dstData,
1003-
srcRgnr0, nullptr, nullptr, option, true);
1006+
srcRgnr0, nullptr, nullptr, option, addToInstList);
10041007

10051008
fill->asFillIntrinsic()->setFP(fp);
10061009
fill->asFillIntrinsic()->setOffset((uint32_t)

visa/GraphColor.cpp

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6911,7 +6911,7 @@ void GlobalRA::stackCallProlog()
69116911
G4_DstRegRegion* postDst = builder.createNullDst(Type_UD);
69126912
G4_INST* store = nullptr;
69136913
{
6914-
store = builder.createSpill(postDst, payloadSrc, G4_ExecSize(execSize), 1, 0, builder.getBESP(), InstOpt_WriteEnable);
6914+
store = builder.createSpill(postDst, payloadSrc, G4_ExecSize(execSize), 1, 0, builder.getBESP(), InstOpt_WriteEnable, false);
69156915
}
69166916
builder.setFDSpillInst(store);
69176917
G4_BB* entryBB = builder.kernel.fg.getEntryBB();
@@ -6953,7 +6953,8 @@ void GlobalRA::saveRegs(
69536953
builder.getRegionStride1(), Type_UD);
69546954
G4_DstRegRegion* dst = builder.createNullDst((execSize > 8) ? Type_UW : Type_UD);
69556955
G4_INST* spillIntrinsic = nullptr;
6956-
spillIntrinsic = builder.createSpill(dst, sendSrc2, execSize, messageLength, frameOwordOffset/2, framePtr, InstOpt_WriteEnable);
6956+
spillIntrinsic = builder.createSpill(dst, sendSrc2, execSize, messageLength, frameOwordOffset/2, framePtr, InstOpt_WriteEnable, false);
6957+
spillIntrinsic->inheritDIFrom(*insertIt);
69576958
bb->insertBefore(insertIt, spillIntrinsic);
69586959
group.insert(spillIntrinsic);
69596960
}
@@ -7036,7 +7037,8 @@ void GlobalRA::restoreRegs(
70367037
dstDcl->getRegVar()->setPhyReg(regPool.getGreg(startReg), 0);
70377038
G4_DstRegRegion* dstRgn = builder.createDst(dstDcl->getRegVar(), 0, 0, 1, (execSize > 8) ? Type_UW : Type_UD);
70387039
G4_INST* fillIntrinsic = nullptr;
7039-
fillIntrinsic = builder.createFill(dstRgn, execSize, responseLength, frameOwordOffset / 2, framePtr, InstOpt_WriteEnable);
7040+
fillIntrinsic = builder.createFill(dstRgn, execSize, responseLength, frameOwordOffset / 2, framePtr, InstOpt_WriteEnable, false);
7041+
fillIntrinsic->inheritDIFrom(*insertIt);
70407042
bb->insertBefore(insertIt, fillIntrinsic);
70417043
group.insert(fillIntrinsic);
70427044
}
@@ -7795,6 +7797,7 @@ void GlobalRA::addCallerSavePseudoCode()
77957797
G4_DstRegRegion* dst = builder.createDst(pseudoVCADcl->getRegVar(), 0, 0, 1, Type_UD);
77967798
G4_INST* saveInst = builder.createInternalIntrinsicInst(
77977799
nullptr, Intrinsic::CallerSave, g4::SIMD1, dst, nullptr, nullptr, nullptr, InstOpt_WriteEnable);
7800+
saveInst->inheritDIFrom(fcallInst);
77987801
INST_LIST_ITER callBBIt = bb->end();
77997802
bb->insertBefore(--callBBIt, saveInst);
78007803

@@ -7819,6 +7822,7 @@ void GlobalRA::addCallerSavePseudoCode()
78197822
G4_INST* restoreInst =
78207823
builder.createInternalIntrinsicInst(
78217824
nullptr, Intrinsic::CallerRestore, g4::SIMD1, nullptr, src, nullptr, nullptr, InstOpt_WriteEnable);
7825+
restoreInst->inheritDIFrom(fcallInst);
78227826
retBB->insertBefore(retBBIt, restoreInst);
78237827
}
78247828
}
@@ -9735,8 +9739,8 @@ int GlobalRA::coloringRegAlloc()
97359739

97369740
bool disableSpillCoalecse = builder.getOption(vISA_DisableSpillCoalescing) ||
97379741
builder.getOption(vISA_FastSpill) || fastCompile || builder.getOption(vISA_Debug) ||
9738-
(!useScratchMsgForSpill
9739-
);
9742+
// spill cleanup is not support when we use oword msg for spill/fill for non-stack calls.
9743+
(!useScratchMsgForSpill && !hasStackCall);
97409744

97419745
if (!reserveSpillReg && !disableSpillCoalecse && builder.useSends())
97429746
{
@@ -12451,6 +12455,57 @@ unsigned GraphColor::edgeWeightARF(const LiveRange* lr1, const LiveRange* lr2)
1245112455
return 0;
1245212456
}
1245312457

12458+
void GlobalRA::fixSrc0IndirFcall()
12459+
{
12460+
// Indirect calls look like:
12461+
// mov (1|NM) V10 0x123456:ud
12462+
// fcall (1) dst V10 <-- V10 which is src0 contains %ip to jump to
12463+
//
12464+
// In this function, we want to set V10 to r125.0 which is same as dst of fcall
12465+
// as per ABI. This way, when inserting save/restore code around fcall, no
12466+
// special checks are needed to handle V10.
12467+
//
12468+
// But this works only if V10 is a local. If it not a local we create a mov
12469+
// that copies V10 in to a new temp variable. And then we map this temp
12470+
// variable to r125.0. Hopefully V10 being global would be a rare occurence.
12471+
for (auto bb : kernel.fg)
12472+
{
12473+
if (bb->isEndWithFCall())
12474+
{
12475+
auto fcall = bb->back()->asCFInst();
12476+
if (!fcall->getSrc(0) ||
12477+
!fcall->getSrc(0)->isSrcRegRegion())
12478+
continue;
12479+
12480+
auto src0Rgn = fcall->getSrc(0)->asSrcRegRegion();
12481+
auto src0Dcl = src0Rgn->getBase()->asRegVar()->getDeclare();
12482+
auto src0TopDcl = src0Rgn->getTopDcl();
12483+
12484+
if (src0Dcl != src0TopDcl ||
12485+
!isBlockLocal(src0TopDcl) ||
12486+
src0TopDcl->getNumElems() > 1)
12487+
{
12488+
// create a copy
12489+
auto tmpDcl = kernel.fg.builder->createHardwiredDeclare(1, src0Rgn->getType(), kernel.getFPSPGRF(),
12490+
IR_Builder::SubRegs_Stackcall::Ret_IP);
12491+
auto dst = kernel.fg.builder->createDst(tmpDcl->getRegVar(), src0Rgn->getType());
12492+
auto src = kernel.fg.builder->duplicateOperand(src0Rgn);
12493+
auto copy = kernel.fg.builder->createMov(g4::SIMD1, dst, src, InstOpt_WriteEnable, false);
12494+
auto iter = std::find_if(bb->begin(), bb->end(), [](G4_INST* inst) { return inst->isFCall(); });
12495+
bb->insertBefore(iter, copy);
12496+
auto newSrc = kernel.fg.builder->createSrc(tmpDcl->getRegVar(), 0, 0, kernel.fg.builder->getRegionScalar(),
12497+
src0Rgn->getType());
12498+
fcall->setSrc(newSrc, 0);
12499+
}
12500+
else
12501+
{
12502+
src0TopDcl->getRegVar()->setPhyReg(fcall->getDst()->getBase()->asRegVar()->getPhyReg(),
12503+
fcall->getDst()->getBase()->asRegVar()->getPhyRegOff());
12504+
}
12505+
}
12506+
}
12507+
}
12508+
1245412509
bool dump(const char* s, LiveRange** lrs, unsigned size)
1245512510
{
1245612511
// Utility function to dump lr from name.

visa/GraphColor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ namespace vISA
822822
void insertRestoreAddr(G4_BB*);
823823
void setIterNo(unsigned i) { iterNo = i; }
824824
unsigned getIterNo() const { return iterNo; }
825+
void fixSrc0IndirFcall();
825826

826827
G4_Declare* getRetDecl(uint32_t retLoc)
827828
{

visa/PhyRegUsage.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ bool PhyRegUsage::findContiguousGRF(bool availRegs[],
271271
findContiguousNoWrapGRF(
272272
availRegs, forbidden, occupiedBundles, align, numRegNeeded, startPosRunOne, endPosRunOne, idx);
273273

274-
if (startPosRunOne > 0 && found == false && !isEOTSrc && !isCalleeSaveBias)
274+
if (startPosRunOne > 0 && found == false && !isEOTSrc)
275275
{
276276
unsigned startPosRunTwo = 0;
277277
unsigned endPosRunTwo = startPos + numRegNeeded;

visa/RegAlloc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3394,6 +3394,7 @@ int regAlloc(IR_Builder& builder, PhyRegPool& regPool, G4_Kernel& kernel)
33943394
if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
33953395
{
33963396
kernel.fg.addSaveRestorePseudoDeclares(builder);
3397+
gra.fixSrc0IndirFcall();
33973398
}
33983399

33993400
int status = gra.coloringRegAlloc();

visa/SpillCleanup.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ G4_SrcRegRegion* CoalesceSpillFills::generateCoalescedSpill(G4_SrcRegRegion* hea
4848
unsigned int option = useNoMask ? InstOpt_WriteEnable : 0;
4949
auto spillInst = kernel.fg.builder->createSpill(
5050
kernel.fg.builder->createNullDst(Type_UW), header, spillSrcPayload, g4::SIMD16, payloadSize,
51-
GlobalRA::GRFToHwordSize(scratchOffset), fp, static_cast<G4_InstOption>(option));
51+
GlobalRA::GRFToHwordSize(scratchOffset), fp, static_cast<G4_InstOption>(option), false);
5252

5353
if (!useNoMask)
5454
{
@@ -84,7 +84,7 @@ G4_INST* CoalesceSpillFills::generateCoalescedFill(G4_SrcRegRegion* header, unsi
8484
fp = kernel.fg.getFramePtrDcl();
8585

8686
auto fillInst = kernel.fg.builder->createFill(header, fillDst, g4::SIMD16, payloadSize,
87-
GlobalRA::GRFToHwordSize(scratchOffset), fp, InstOpt_WriteEnable);
87+
GlobalRA::GRFToHwordSize(scratchOffset), fp, InstOpt_WriteEnable, false);
8888
return fillInst;
8989
}
9090

@@ -302,7 +302,8 @@ bool CoalesceSpillFills::fillHeuristic(std::list<INST_LIST_ITER>& coalesceableFi
302302
G4_Declare* header = (*coalesceableFills.front())->asFillIntrinsic()->getHeader()->getTopDcl();
303303
for (auto f : coalesceableFills)
304304
{
305-
if ((*f)->asFillIntrinsic()->getHeader()->getTopDcl() != header)
305+
if ((*f)->asFillIntrinsic()->getHeader()->getTopDcl() != header &&
306+
!(*f)->asFillIntrinsic()->getFP())
306307
return false;
307308

308309
unsigned int scratchOffset, scratchSize;
@@ -574,7 +575,8 @@ void CoalesceSpillFills::keepConsecutiveSpills(std::list<INST_LIST_ITER>& instLi
574575
{
575576
auto inst = (*instIt);
576577

577-
if (inst->asSpillIntrinsic()->getHeader()->getTopDcl() != header)
578+
if (inst->asSpillIntrinsic()->getHeader()->getTopDcl() != header &&
579+
!inst->asSpillIntrinsic()->getFP())
578580
{
579581
return;
580582
}

visa/SpillManagerGMRF.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,7 +2372,7 @@ G4_INST *SpillManagerGRF::createSpillSendInstr(
23722372
headerOpnd = builder_->Create_Src_Opnd_From_Dcl(builder_->getBuiltinR0(), builder_->getRegionStride1());
23732373
}
23742374
}
2375-
sendInst = builder_->createSpill(postDst, headerOpnd, srcOpnd, execSize, height, off, fp, InstOpt_WriteEnable);
2375+
sendInst = builder_->createSpill(postDst, headerOpnd, srcOpnd, execSize, height, off, fp, InstOpt_WriteEnable, true);
23762376
sendInst->inheritDIFrom(curInst);
23772377
}
23782378
else
@@ -2435,7 +2435,7 @@ G4_INST *SpillManagerGRF::createSpillSendInstr (
24352435
}
24362436
}
24372437
sendInst = builder_->createSpill(postDst, headerOpnd, srcOpnd, spillExecSize, (uint16_t)extMsgLength,
2438-
off, fp, static_cast<G4_InstOption>(option));
2438+
off, fp, static_cast<G4_InstOption>(option), true);
24392439
sendInst->inheritDIFrom(curInst);
24402440
}
24412441
else
@@ -2618,7 +2618,7 @@ G4_INST * SpillManagerGRF::createFillSendInstr (
26182618
payload = builder_->Create_Src_Opnd_From_Dcl(builder_->getBuiltinR0(), builder_->getRegionStride1());
26192619
}
26202620
}
2621-
auto fillInst = builder_->createFill(payload, postDst, execSize, height, off, fp, InstOpt_WriteEnable);
2621+
auto fillInst = builder_->createFill(payload, postDst, execSize, height, off, fp, InstOpt_WriteEnable, true);
26222622
fillInst->inheritDIFrom(curInst);
26232623
return fillInst;
26242624

@@ -2676,7 +2676,7 @@ G4_INST * SpillManagerGRF::createFillSendInstr(
26762676
}
26772677

26782678
unsigned responseLength = cdiv(segmentByteSize, REG_BYTE_SIZE);
2679-
auto fillInst = builder_->createFill(payload, postDst, execSize, responseLength, off, fp, InstOpt_WriteEnable);
2679+
auto fillInst = builder_->createFill(payload, postDst, execSize, responseLength, off, fp, InstOpt_WriteEnable, true);
26802680
fillInst->inheritDIFrom(curInst);
26812681
return fillInst;
26822682
}

0 commit comments

Comments
 (0)