Skip to content

Commit fa863ca

Browse files
weiyu-chensys_zuul
authored andcommitted
Align caller-save and callee-save frame offset to 64 byte.
Change-Id: I372a37c001540b8adb505d4de7489562fdabc56d
1 parent 2598bb5 commit fa863ca

File tree

3 files changed

+22
-34
lines changed

3 files changed

+22
-34
lines changed

visa/FlowGraph.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -729,8 +729,7 @@ class FlowGraph
729729
// offset in unit of OW
730730
unsigned callerSaveAreaOffset = 0;
731731
unsigned calleeSaveAreaOffset = 0;
732-
unsigned paramOverflowAreaOffset = 0;
733-
unsigned paramOverflowAreaSize = 0;
732+
unsigned frameSizeInOWord = 0;
734733

735734
// Bank conflict statistics.
736735
struct BankConflictStatistics

visa/GraphColor.cpp

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7071,8 +7071,8 @@ void GraphColor::OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::v
70717071
//
70727072
void GraphColor::addCallerSaveRestoreCode()
70737073
{
7074-
// maxCallerSaveSize in Oword
7075-
unsigned int maxCallerSaveSize = builder.kernel.fg.callerSaveAreaOffset;
7074+
7075+
uint32_t maxCallerSaveSize = 0;
70767076
unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
70777077

70787078
for (BB_LIST_ITER it = builder.kernel.fg.begin(); it != builder.kernel.fg.end(); ++it)
@@ -7208,10 +7208,7 @@ void GraphColor::addCallerSaveRestoreCode()
72087208
}
72097209
afterFCallBB->erase(insertRestIt);
72107210

7211-
// FIXME: maxCallerSaveSize in unit of OWord, here assume a register is 2 Oword
7212-
// builder.kernel.fg.paramOverflowAreaOffset = builder.kernel.fg.callerSaveAreaOffset + callerSaveRegsWritten * 2;
7213-
if (maxCallerSaveSize < (builder.kernel.fg.callerSaveAreaOffset + callerSaveRegsWritten * 2))
7214-
maxCallerSaveSize = (builder.kernel.fg.callerSaveAreaOffset + callerSaveRegsWritten * 2);
7211+
maxCallerSaveSize = std::max(maxCallerSaveSize, callerSaveRegsWritten * getGRFSize());
72157212

72167213
if (m_options->getOption(vISA_OptReport))
72177214
{
@@ -7225,7 +7222,8 @@ void GraphColor::addCallerSaveRestoreCode()
72257222
}
72267223
}
72277224

7228-
builder.kernel.fg.paramOverflowAreaOffset = maxCallerSaveSize;
7225+
auto byteOffset = builder.kernel.fg.callerSaveAreaOffset * 16 + maxCallerSaveSize;
7226+
builder.kernel.fg.frameSizeInOWord = ROUND(byteOffset, 64) / 16;
72297227

72307228
builder.instList.clear();
72317229
}
@@ -7235,7 +7233,7 @@ void GraphColor::addCallerSaveRestoreCode()
72357233
//
72367234
void GraphColor::addCalleeSaveRestoreCode()
72377235
{
7238-
builder.kernel.fg.callerSaveAreaOffset = builder.kernel.fg.calleeSaveAreaOffset;
7236+
72397237
unsigned int callerSaveNumGRF = builder.kernel.getCallerSaveLastGRF() + 1;
72407238
unsigned int numCalleeSaveRegs = builder.kernel.getNumCalleeSaveRegs();
72417239

@@ -7335,15 +7333,11 @@ void GraphColor::addCalleeSaveRestoreCode()
73357333
}
73367334
builder.kernel.fg.getUniqueReturnBlock()->erase(eraseIt);
73377335

7338-
// FIXME: builder.kernel.fg.calleeSaveAreaOffset looks like in OWord, here assume
7339-
// register size is two Oword, so
7340-
// builder.kernel.fg.callerSaveAreaOffset = calleeSaveAreaOffset + calleeSaveRegsWritten * 2
7341-
builder.kernel.fg.callerSaveAreaOffset =
7342-
MAX(
7343-
builder.kernel.fg.calleeSaveAreaOffset + calleeSaveRegsWritten * 2,
7344-
builder.kernel.fg.callerSaveAreaOffset);
73457336
builder.instList.clear();
73467337

7338+
// caller-save starts after callee-save and is 64-byte aligned
7339+
auto byteOffset = builder.kernel.fg.calleeSaveAreaOffset * 16 + calleeSaveRegsWritten * getGRFSize();
7340+
builder.kernel.fg.callerSaveAreaOffset = ROUND(byteOffset, 64) / 16;
73477341
if (m_options->getOption(vISA_OptReport))
73487342
{
73497343
std::ofstream optreport;
@@ -7360,11 +7354,11 @@ void GraphColor::addCalleeSaveRestoreCode()
73607354
void GraphColor::addGenxMainStackSetupCode()
73617355
{
73627356
uint32_t fpInitVal = (uint32_t)kernel.getIntKernelAttribute(Attributes::ATTR_SpillMemOffset);
7363-
// FIXME: a potential failure here is that paramOverflowAreaOffset is already the offset based on
7357+
// FIXME: a potential failure here is that frameSizeInOword is already the offset based on
73647358
// GlobalSratchOffset, which is the value of fpInitVal. So below we generate code to do
73657359
// SP = fpInitVal + frameSize, which does not make sense. It is correct now since when there's stack call,
73667360
// IGC will not use scratch, so fpInitVal will be 0.
7367-
unsigned frameSize = builder.kernel.fg.paramOverflowAreaOffset + builder.kernel.fg.paramOverflowAreaSize;
7361+
unsigned frameSize = builder.kernel.fg.frameSizeInOWord;
73687362
G4_Declare* framePtr = builder.kernel.fg.framePtrDcl;
73697363
G4_Declare* stackPtr = builder.kernel.fg.stackPtrDcl;
73707364

@@ -7409,7 +7403,7 @@ void GraphColor::addGenxMainStackSetupCode()
74097403
//
74107404
void GraphColor::addCalleeStackSetupCode()
74117405
{
7412-
int frameSize = (int)builder.kernel.fg.paramOverflowAreaOffset /*- builder.kernel.fg.calleeSaveAreaOffset*/;
7406+
int frameSize = (int)builder.kernel.fg.frameSizeInOWord;
74137407
G4_Declare* framePtr = builder.kernel.fg.framePtrDcl;
74147408
G4_Declare* stackPtr = builder.kernel.fg.stackPtrDcl;
74157409

@@ -7430,7 +7424,7 @@ void GraphColor::addCalleeStackSetupCode()
74307424
}
74317425
//
74327426
// BE_FP = BE_SP
7433-
// BE_SP += FrameSize (overflow-area offset + overflow-area size)
7427+
// BE_SP += FrameSize
74347428
//
74357429
{
74367430
G4_DstRegRegion* dst = builder.createDst(stackPtr->getRegVar(), 0, 0, 1, Type_UD);
@@ -7629,6 +7623,8 @@ void GraphColor::addFlagSaveRestoreCode()
76297623

76307624
//
76317625
// Add GRF caller/callee save/restore code for stack calls.
7626+
// localSpillAreaOwordsize specifices the starting offset of the caller/callee-save area in this frame.
7627+
// It is 64-byte aligned.
76327628
//
76337629
void GraphColor::addSaveRestoreCode(unsigned localSpillAreaOwordSize)
76347630
{
@@ -7645,9 +7641,6 @@ void GraphColor::addSaveRestoreCode(unsigned localSpillAreaOwordSize)
76457641
}
76467642
else
76477643
{
7648-
// FIXME: looks like inside addCalleeSaveRestoreCode() and addCallerSaveRestoreCode(),
7649-
// the expected offset (of calleeSaveAreaOffset and callerSaveAreaOffset) is 0-based.
7650-
// But localSpillAreaOwordSize is based on globalScratchOffset.
76517644
builder.kernel.fg.calleeSaveAreaOffset = localSpillAreaOwordSize;
76527645
addCalleeSaveRestoreCode();
76537646
}
@@ -9549,7 +9542,9 @@ int GlobalRA::coloringRegAlloc()
95499542

95509543
if (hasStackCall)
95519544
{
9552-
unsigned localSpillAreaOwordSize = ROUND(scratchOffset, 16) / 16;
9545+
// spill/fill intrinsics expect offset in HWord, so round up to 64 byte but maintain it in OWord unit
9546+
// ToDo: we really need to change everything to byte for everyone's sanity..
9547+
unsigned localSpillAreaOwordSize = ROUND(scratchOffset, 64) / 16;
95539548
// the given localSpillAreaOwordSize is the offset based on globalScratchOffset
95549549
coloring.addSaveRestoreCode(localSpillAreaOwordSize);
95559550
}
@@ -9662,7 +9657,7 @@ int GlobalRA::coloringRegAlloc()
96629657
jitInfo->isSpill = spillMemUsed > 0;
96639658
jitInfo->hasStackcalls = kernel.fg.getHasStackCalls();
96649659

9665-
if (builder.kernel.fg.paramOverflowAreaOffset != 0) {
9660+
if (builder.kernel.fg.frameSizeInOWord != 0) {
96669661
// jitInfo->spillMemUsed is the entire visa stack size. Consider the caller/callee
96679662
// save size if having caller/callee save
96689663
// globalScratchOffset in unit of byte, others in Oword
@@ -9676,13 +9671,8 @@ int GlobalRA::coloringRegAlloc()
96769671
// callerSaveAreaOffset -> ---------------------
96779672
// | caller save |
96789673
// paramOverflowAreaOffset -> ---------------------
9679-
// | paramOverflowArea |
9680-
// ---------------------
9681-
// FIXME: paramOverflowAreaOffset and paramOverflowAreaSize don't seem like be used
9682-
// anywhere, do we need them?
96839674
jitInfo->spillMemUsed =
9684-
(builder.kernel.fg.paramOverflowAreaOffset +
9685-
builder.kernel.fg.paramOverflowAreaSize) * 16 - globalScratchOffset;
9675+
builder.kernel.fg.frameSizeInOWord * 16 - globalScratchOffset;
96869676

96879677
// reserve spillMemUsed #bytes before 8kb boundary
96889678
kernel.getGTPinData()->setScratchNextFree(8*1024 - kernel.getGTPinData()->getNumBytesScratchUse());

visa/RegAlloc.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3716,8 +3716,7 @@ int regAlloc(IR_Builder& builder, PhyRegPool& regPool, G4_Kernel& kernel)
37163716
kernel.dumpDotFile("PreRegAlloc");
37173717
}
37183718

3719-
kernel.fg.callerSaveAreaOffset = kernel.fg.calleeSaveAreaOffset = kernel.fg.paramOverflowAreaOffset =
3720-
kernel.fg.paramOverflowAreaSize = 0;
3719+
kernel.fg.callerSaveAreaOffset = kernel.fg.calleeSaveAreaOffset = kernel.fg.frameSizeInOWord = 0;
37213720

37223721
// This must be done before Points-to analysis as it may modify CFG and add new BB!
37233722
if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())

0 commit comments

Comments
 (0)