@@ -7071,8 +7071,8 @@ void GraphColor::OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::v
7071
7071
//
7072
7072
void GraphColor::addCallerSaveRestoreCode ()
7073
7073
{
7074
- // maxCallerSaveSize in Oword
7075
- unsigned int maxCallerSaveSize = builder. kernel . fg . callerSaveAreaOffset ;
7074
+
7075
+ uint32_t maxCallerSaveSize = 0 ;
7076
7076
unsigned int callerSaveNumGRF = builder.kernel .getCallerSaveLastGRF () + 1 ;
7077
7077
7078
7078
for (BB_LIST_ITER it = builder.kernel .fg .begin (); it != builder.kernel .fg .end (); ++it)
@@ -7208,10 +7208,7 @@ void GraphColor::addCallerSaveRestoreCode()
7208
7208
}
7209
7209
afterFCallBB->erase (insertRestIt);
7210
7210
7211
- // FIXME: maxCallerSaveSize in unit of OWord, here assume a register is 2 Oword
7212
- // builder.kernel.fg.paramOverflowAreaOffset = builder.kernel.fg.callerSaveAreaOffset + callerSaveRegsWritten * 2;
7213
- if (maxCallerSaveSize < (builder.kernel .fg .callerSaveAreaOffset + callerSaveRegsWritten * 2 ))
7214
- maxCallerSaveSize = (builder.kernel .fg .callerSaveAreaOffset + callerSaveRegsWritten * 2 );
7211
+ maxCallerSaveSize = std::max (maxCallerSaveSize, callerSaveRegsWritten * getGRFSize ());
7215
7212
7216
7213
if (m_options->getOption (vISA_OptReport))
7217
7214
{
@@ -7225,7 +7222,8 @@ void GraphColor::addCallerSaveRestoreCode()
7225
7222
}
7226
7223
}
7227
7224
7228
- builder.kernel .fg .paramOverflowAreaOffset = maxCallerSaveSize;
7225
+ auto byteOffset = builder.kernel .fg .callerSaveAreaOffset * 16 + maxCallerSaveSize;
7226
+ builder.kernel .fg .frameSizeInOWord = ROUND (byteOffset, 64 ) / 16 ;
7229
7227
7230
7228
builder.instList .clear ();
7231
7229
}
@@ -7235,7 +7233,7 @@ void GraphColor::addCallerSaveRestoreCode()
7235
7233
//
7236
7234
void GraphColor::addCalleeSaveRestoreCode ()
7237
7235
{
7238
- builder. kernel . fg . callerSaveAreaOffset = builder. kernel . fg . calleeSaveAreaOffset ;
7236
+
7239
7237
unsigned int callerSaveNumGRF = builder.kernel .getCallerSaveLastGRF () + 1 ;
7240
7238
unsigned int numCalleeSaveRegs = builder.kernel .getNumCalleeSaveRegs ();
7241
7239
@@ -7335,15 +7333,11 @@ void GraphColor::addCalleeSaveRestoreCode()
7335
7333
}
7336
7334
builder.kernel .fg .getUniqueReturnBlock ()->erase (eraseIt);
7337
7335
7338
- // FIXME: builder.kernel.fg.calleeSaveAreaOffset looks like in OWord, here assume
7339
- // register size is two Oword, so
7340
- // builder.kernel.fg.callerSaveAreaOffset = calleeSaveAreaOffset + calleeSaveRegsWritten * 2
7341
- builder.kernel .fg .callerSaveAreaOffset =
7342
- MAX (
7343
- builder.kernel .fg .calleeSaveAreaOffset + calleeSaveRegsWritten * 2 ,
7344
- builder.kernel .fg .callerSaveAreaOffset );
7345
7336
builder.instList .clear ();
7346
7337
7338
+ // caller-save starts after callee-save and is 64-byte aligned
7339
+ auto byteOffset = builder.kernel .fg .calleeSaveAreaOffset * 16 + calleeSaveRegsWritten * getGRFSize ();
7340
+ builder.kernel .fg .callerSaveAreaOffset = ROUND (byteOffset, 64 ) / 16 ;
7347
7341
if (m_options->getOption (vISA_OptReport))
7348
7342
{
7349
7343
std::ofstream optreport;
@@ -7360,11 +7354,11 @@ void GraphColor::addCalleeSaveRestoreCode()
7360
7354
void GraphColor::addGenxMainStackSetupCode ()
7361
7355
{
7362
7356
uint32_t fpInitVal = (uint32_t )kernel.getIntKernelAttribute (Attributes::ATTR_SpillMemOffset);
7363
- // FIXME: a potential failure here is that paramOverflowAreaOffset is already the offset based on
7357
+ // FIXME: a potential failure here is that frameSizeInOword is already the offset based on
7364
7358
// GlobalSratchOffset, which is the value of fpInitVal. So below we generate code to do
7365
7359
// SP = fpInitVal + frameSize, which does not make sense. It is correct now since when there's stack call,
7366
7360
// IGC will not use scratch, so fpInitVal will be 0.
7367
- unsigned frameSize = builder.kernel .fg .paramOverflowAreaOffset + builder. kernel . fg . paramOverflowAreaSize ;
7361
+ unsigned frameSize = builder.kernel .fg .frameSizeInOWord ;
7368
7362
G4_Declare* framePtr = builder.kernel .fg .framePtrDcl ;
7369
7363
G4_Declare* stackPtr = builder.kernel .fg .stackPtrDcl ;
7370
7364
@@ -7409,7 +7403,7 @@ void GraphColor::addGenxMainStackSetupCode()
7409
7403
//
7410
7404
void GraphColor::addCalleeStackSetupCode ()
7411
7405
{
7412
- int frameSize = (int )builder.kernel .fg .paramOverflowAreaOffset /* - builder.kernel.fg.calleeSaveAreaOffset */ ;
7406
+ int frameSize = (int )builder.kernel .fg .frameSizeInOWord ;
7413
7407
G4_Declare* framePtr = builder.kernel .fg .framePtrDcl ;
7414
7408
G4_Declare* stackPtr = builder.kernel .fg .stackPtrDcl ;
7415
7409
@@ -7430,7 +7424,7 @@ void GraphColor::addCalleeStackSetupCode()
7430
7424
}
7431
7425
//
7432
7426
// BE_FP = BE_SP
7433
- // BE_SP += FrameSize (overflow-area offset + overflow-area size)
7427
+ // BE_SP += FrameSize
7434
7428
//
7435
7429
{
7436
7430
G4_DstRegRegion* dst = builder.createDst (stackPtr->getRegVar (), 0 , 0 , 1 , Type_UD);
@@ -7629,6 +7623,8 @@ void GraphColor::addFlagSaveRestoreCode()
7629
7623
7630
7624
//
7631
7625
// Add GRF caller/callee save/restore code for stack calls.
7626
+ // localSpillAreaOwordsize specifices the starting offset of the caller/callee-save area in this frame.
7627
+ // It is 64-byte aligned.
7632
7628
//
7633
7629
void GraphColor::addSaveRestoreCode (unsigned localSpillAreaOwordSize)
7634
7630
{
@@ -7645,9 +7641,6 @@ void GraphColor::addSaveRestoreCode(unsigned localSpillAreaOwordSize)
7645
7641
}
7646
7642
else
7647
7643
{
7648
- // FIXME: looks like inside addCalleeSaveRestoreCode() and addCallerSaveRestoreCode(),
7649
- // the expected offset (of calleeSaveAreaOffset and callerSaveAreaOffset) is 0-based.
7650
- // But localSpillAreaOwordSize is based on globalScratchOffset.
7651
7644
builder.kernel .fg .calleeSaveAreaOffset = localSpillAreaOwordSize;
7652
7645
addCalleeSaveRestoreCode ();
7653
7646
}
@@ -9549,7 +9542,9 @@ int GlobalRA::coloringRegAlloc()
9549
9542
9550
9543
if (hasStackCall)
9551
9544
{
9552
- unsigned localSpillAreaOwordSize = ROUND (scratchOffset, 16 ) / 16 ;
9545
+ // spill/fill intrinsics expect offset in HWord, so round up to 64 byte but maintain it in OWord unit
9546
+ // ToDo: we really need to change everything to byte for everyone's sanity..
9547
+ unsigned localSpillAreaOwordSize = ROUND (scratchOffset, 64 ) / 16 ;
9553
9548
// the given localSpillAreaOwordSize is the offset based on globalScratchOffset
9554
9549
coloring.addSaveRestoreCode (localSpillAreaOwordSize);
9555
9550
}
@@ -9662,7 +9657,7 @@ int GlobalRA::coloringRegAlloc()
9662
9657
jitInfo->isSpill = spillMemUsed > 0 ;
9663
9658
jitInfo->hasStackcalls = kernel.fg .getHasStackCalls ();
9664
9659
9665
- if (builder.kernel .fg .paramOverflowAreaOffset != 0 ) {
9660
+ if (builder.kernel .fg .frameSizeInOWord != 0 ) {
9666
9661
// jitInfo->spillMemUsed is the entire visa stack size. Consider the caller/callee
9667
9662
// save size if having caller/callee save
9668
9663
// globalScratchOffset in unit of byte, others in Oword
@@ -9676,13 +9671,8 @@ int GlobalRA::coloringRegAlloc()
9676
9671
// callerSaveAreaOffset -> ---------------------
9677
9672
// | caller save |
9678
9673
// paramOverflowAreaOffset -> ---------------------
9679
- // | paramOverflowArea |
9680
- // ---------------------
9681
- // FIXME: paramOverflowAreaOffset and paramOverflowAreaSize don't seem like be used
9682
- // anywhere, do we need them?
9683
9674
jitInfo->spillMemUsed =
9684
- (builder.kernel .fg .paramOverflowAreaOffset +
9685
- builder.kernel .fg .paramOverflowAreaSize ) * 16 - globalScratchOffset;
9675
+ builder.kernel .fg .frameSizeInOWord * 16 - globalScratchOffset;
9686
9676
9687
9677
// reserve spillMemUsed #bytes before 8kb boundary
9688
9678
kernel.getGTPinData ()->setScratchNextFree (8 *1024 - kernel.getGTPinData ()->getNumBytesScratchUse ());
0 commit comments