Skip to content

Commit a0bbba5

Browse files
dlei6gsys_zuul
authored andcommitted
Add Frame-Pointer support to stack calls:
- Stack call convention match standard OS implementation instead of relying only on SP and offsets for frame calculation. - Extra register used to store FP info. - Support for stack walk by storing previous frame's FP to the start of each frame. Other code refactor and bugfixes also included. Change-Id: I7e5137133e2e9affdf49c3d1f9310cfe2d379077
1 parent eab9601 commit a0bbba5

File tree

4 files changed

+178
-120
lines changed

4 files changed

+178
-120
lines changed

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 48 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ void CShader::InitEncoder(SIMDMode simdSize, bool canAbortOnSpill, ShaderDispatc
9999
m_DBG = nullptr;
100100
m_HW_TID = nullptr;
101101
m_SP = nullptr;
102-
m_SavedSP = nullptr;
102+
m_FP = nullptr;
103+
m_SavedFP = nullptr;
103104
m_ARGV = nullptr;
104105
m_RETV = nullptr;
105106

@@ -247,6 +248,9 @@ CVariable* CShader::CreateSP()
247248
// create stack-pointer register
248249
m_SP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "SP");
249250
encoder.GetVISAPredefinedVar(m_SP, PREDEFINED_FE_SP);
251+
// create frame-pointer register
252+
m_FP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "FP");
253+
encoder.GetVISAPredefinedVar(m_FP, PREDEFINED_FE_FP);
250254

251255
return m_SP;
252256
}
@@ -269,108 +273,26 @@ uint32_t CShader::GetMaxPrivateMem()
269273
return MaxPrivateSize;
270274
}
271275

272-
/// initial stack-pointer at the beginning of the kernel
273-
void CShader::InitKernelStack(CVariable*& stackBase, CVariable*& stackAllocSize)
276+
/// save FP of previous frame when entering a stack-call function
277+
void CShader::SaveStackState()
274278
{
275-
CreateSP();
276-
ImplicitArgs implicitArgs(*entry, m_pMdUtils);
277-
unsigned numPushArgs = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
278-
unsigned numImplicitArgs = implicitArgs.size();
279-
unsigned numFuncArgs = IGCLLVM::GetFuncArgSize(entry) - numImplicitArgs - numPushArgs;
280-
281-
Argument* kerArg = nullptr;
282-
llvm::Function::arg_iterator arg = entry->arg_begin();
283-
for (unsigned i = 0; i < numFuncArgs; ++i, ++arg);
284-
for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
285-
ImplicitArg implicitArg = implicitArgs[i];
286-
if (implicitArg.getArgType() == ImplicitArg::ArgType::PRIVATE_BASE)
287-
{
288-
kerArg = (&*arg);
289-
break;
290-
}
291-
}
292-
IGC_ASSERT(kerArg);
293-
294-
CVariable* pHWTID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
295-
encoder.SetSrcRegion(0, 0, 1, 0);
296-
encoder.SetSrcSubReg(0, 5);
297-
encoder.And(pHWTID, GetR0(), ImmToVariable(0x1ff, ISA_TYPE_UD));
279+
IGC_ASSERT(!m_SavedFP && m_FP && m_SP);
280+
m_SavedFP = GetNewVariable(m_FP);
281+
encoder.Copy(m_SavedFP, m_FP);
298282
encoder.Push();
299-
300-
CVariable* pSize = nullptr;
301-
302-
// Maximun private size in byte, per-workitem
303-
// When there's stack call, we don't know the actual stack size being used,
304-
// so set a conservative max stack size.
305-
uint32_t MaxPrivateSize = GetMaxPrivateMem();
306-
if (IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
307-
{
308-
// Experimental: Patch private memory size
309-
std::string patchName = "INTEL_PATCH_PRIVATE_MEMORY_SIZE";
310-
pSize = GetNewVariable(1, ISA_TYPE_UD, CVariable::getAlignment(getGRFSize()), true, CName(patchName));
311-
encoder.AddVISASymbol(patchName, pSize);
312-
}
313-
else
314-
{
315-
// hard-code per-workitem private-memory size to max size
316-
pSize = ImmToVariable(MaxPrivateSize * numLanes(m_dispatchSize), ISA_TYPE_UD);
317-
}
318-
319-
CVariable* pTemp = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, CName::NONE);
320-
encoder.Mul(pTemp, pHWTID, pSize);
321-
encoder.Push();
322-
323-
// reserve space for alloca
324-
auto funcMDItr = m_ModuleMetadata->FuncMD.find(entry);
325-
if (funcMDItr != m_ModuleMetadata->FuncMD.end())
326-
{
327-
if (funcMDItr->second.privateMemoryPerWI != 0)
328-
{
329-
unsigned totalAllocaSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_dispatchSize);
330-
encoder.Add(pTemp, pTemp, ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
331-
encoder.Push();
332-
333-
// Set the total alloca size for the entry function
334-
encoder.SetFunctionAllocaStackSize(entry, totalAllocaSize);
335-
336-
if ((uint32_t)funcMDItr->second.privateMemoryPerWI > MaxPrivateSize)
337-
{
338-
GetContext()->EmitError("Private memory allocation exceeds max allowed size");
339-
IGC_ASSERT(0);
340-
}
341-
}
342-
}
343-
344-
if (!IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
345-
{
346-
// If we don't return per-function private memory size,
347-
// modify private-memory size to a large setting.
348-
// This will be reported through patch-tokens as per-kernel requirement.
349-
m_ModuleMetadata->FuncMD[entry].privateMemoryPerWI = MaxPrivateSize;
350-
}
351-
352-
stackBase = GetSymbol(kerArg);
353-
stackAllocSize = pTemp;
354283
}
355284

356-
/// save stack-pointer when entering a stack-call function
357-
void CShader::SaveSP()
285+
/// restore SP and FP when exiting a stack-call function
286+
void CShader::RestoreStackState()
358287
{
359-
IGC_ASSERT(!m_SavedSP);
360-
IGC_ASSERT(m_SP);
361-
m_SavedSP = GetNewVariable(m_SP);
362-
encoder.Copy(m_SavedSP, m_SP);
288+
IGC_ASSERT(m_SavedFP && m_FP && m_SP);
289+
// Restore SP to current FP
290+
encoder.Copy(m_SP, m_FP);
363291
encoder.Push();
364-
}
365-
366-
/// restore stack-pointer when exiting a stack-call function
367-
void CShader::RestoreSP()
368-
{
369-
IGC_ASSERT(m_SavedSP);
370-
IGC_ASSERT(m_SP);
371-
encoder.Copy(m_SP, m_SavedSP);
292+
// Restore FP to previous frame's FP
293+
encoder.Copy(m_FP, m_SavedFP);
372294
encoder.Push();
373-
m_SavedSP = nullptr;
295+
m_SavedFP = nullptr;
374296
}
375297

376298
void CShader::CreateImplicitArgs()
@@ -822,13 +744,41 @@ CVariable* CShader::GetHWTID()
822744
{
823745
if (!m_HW_TID)
824746
{
825-
m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, CName::NONE);
826-
encoder.GetVISAPredefinedVar(m_HW_TID, PREDEFINED_HW_TID);
747+
{
748+
m_HW_TID = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, "HWTID");
749+
encoder.GetVISAPredefinedVar(m_HW_TID, PREDEFINED_HW_TID);
750+
}
827751
}
828752
return m_HW_TID;
829753
}
830754

755+
CVariable* CShader::GetPrivateBase()
756+
{
757+
ImplicitArgs implicitArgs(*entry, m_pMdUtils);
758+
unsigned numPushArgs = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
759+
unsigned numImplicitArgs = implicitArgs.size();
760+
unsigned numFuncArgs = IGCLLVM::GetFuncArgSize(entry) - numImplicitArgs - numPushArgs;
761+
762+
Argument* kerArg = nullptr;
763+
llvm::Function::arg_iterator arg = entry->arg_begin();
764+
for (unsigned i = 0; i < numFuncArgs; ++i, ++arg);
765+
for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
766+
ImplicitArg implicitArg = implicitArgs[i];
767+
if (implicitArg.getArgType() == ImplicitArg::ArgType::PRIVATE_BASE)
768+
{
769+
kerArg = (&*arg);
770+
break;
771+
}
772+
}
773+
IGC_ASSERT(kerArg);
774+
return GetSymbol(kerArg);
775+
}
831776

777+
CVariable* CShader::GetFP()
778+
{
779+
IGC_ASSERT(m_FP);
780+
return m_FP;
781+
}
832782
CVariable* CShader::GetSP()
833783
{
834784
IGC_ASSERT(m_SP);

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 122 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4141
#include "ShaderCodeGen.hpp"
4242
#include "common/allocator.h"
4343
#include "common/debug/Dump.hpp"
44+
#include "common/debug/Dump.hpp"
4445
#include "common/igc_regkeys.hpp"
4546
#include "common/Stats.hpp"
4647
#include "Compiler/CISACodeGen/helper.h"
@@ -450,10 +451,7 @@ bool EmitPass::runOnFunction(llvm::Function& F)
450451
if (hasStackCall)
451452
{
452453
m_encoder->InitFuncAttribute(&F, true);
453-
CVariable* pStackBase = nullptr;
454-
CVariable* pStackSize = nullptr;
455-
m_currShader->InitKernelStack(pStackBase, pStackSize);
456-
emitAddSP(m_currShader->GetSP(), pStackBase, pStackSize);
454+
InitializeKernelStack(&F);
457455
}
458456
m_currShader->AddPrologue();
459457
}
@@ -9380,6 +9378,80 @@ void EmitPass::emitReturn(llvm::ReturnInst* inst)
93809378
m_currShader->AddEpilogue(inst);
93819379
}
93829380

9381+
/// Initializes the kernel for stack call by initializing the SP and FP
9382+
void EmitPass::InitializeKernelStack(Function* pKernel)
9383+
{
9384+
m_currShader->CreateSP();
9385+
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
9386+
auto pModuleMetadata = pCtx->getModuleMetaData();
9387+
9388+
CVariable* pStackBufferBase = m_currShader->GetPrivateBase();
9389+
9390+
CVariable* pHWTID = m_currShader->GetHWTID();
9391+
9392+
CVariable* pSize = nullptr;
9393+
9394+
// Maximun private size in byte, per-workitem
9395+
// When there's stack call, we don't know the actual stack size being used,
9396+
// so set a conservative max stack size.
9397+
uint32_t MaxPrivateSize = m_currShader->GetMaxPrivateMem();
9398+
if (IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
9399+
{
9400+
// Experimental: Patch private memory size
9401+
std::string patchName = "INTEL_PATCH_PRIVATE_MEMORY_SIZE";
9402+
pSize = m_currShader->GetNewVariable(1, ISA_TYPE_UD, CVariable::getAlignment(getGRFSize()), true, CName(patchName));
9403+
m_encoder->AddVISASymbol(patchName, pSize);
9404+
}
9405+
else
9406+
{
9407+
// hard-code per-workitem private-memory size to max size
9408+
pSize = m_currShader->ImmToVariable(MaxPrivateSize * numLanes(m_currShader->m_dispatchSize), ISA_TYPE_UD);
9409+
}
9410+
9411+
CVariable* pThreadOffset = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, CName::NONE);
9412+
m_encoder->Mul(pThreadOffset, pHWTID, pSize);
9413+
m_encoder->Push();
9414+
9415+
unsigned totalAllocaSize = 0;
9416+
9417+
// reserve space for kernel FP
9418+
totalAllocaSize += SIZE_OWORD;
9419+
9420+
// reserve space for alloca
9421+
auto funcMDItr = pModuleMetadata->FuncMD.find(pKernel);
9422+
if (funcMDItr != pModuleMetadata->FuncMD.end())
9423+
{
9424+
if (funcMDItr->second.privateMemoryPerWI != 0)
9425+
{
9426+
totalAllocaSize += funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
9427+
9428+
if ((uint32_t)funcMDItr->second.privateMemoryPerWI > MaxPrivateSize)
9429+
{
9430+
pCtx->EmitError("Private memory allocation exceeds max allowed size");
9431+
IGC_ASSERT(0);
9432+
}
9433+
}
9434+
}
9435+
9436+
// Set the total alloca size for the entry function
9437+
m_encoder->SetFunctionAllocaStackSize(pKernel, totalAllocaSize);
9438+
9439+
if (!IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
9440+
{
9441+
// If we don't return per-function private memory size,
9442+
// modify private-memory size to a large setting.
9443+
// This will be reported through patch-tokens as per-kernel requirement.
9444+
pModuleMetadata->FuncMD[pKernel].privateMemoryPerWI = MaxPrivateSize;
9445+
}
9446+
9447+
// Initialize SP to per-thread kernel stack base
9448+
CVariable* pSP = m_currShader->GetSP();
9449+
emitAddSP(pSP, pStackBufferBase, pThreadOffset);
9450+
9451+
// Update FP and SP
9452+
emitPushToStack(m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD), true);
9453+
}
9454+
93839455
/// This function is NOT about the alignment-rule for storing argv into GRF!
93849456
/// It is about the alignment-rule when we pack the arguments into a block for stack-call!
93859457
uint EmitPass::stackCallArgumentAlignment(CVariable* argv)
@@ -9942,35 +10014,41 @@ void EmitPass::emitStackFuncEntry(Function* F)
994210014
}
994310015
}
994410016
}
9945-
// save SP before allocation
9946-
m_currShader->SaveSP();
10017+
10018+
unsigned totalAllocaSize = 0;
10019+
10020+
// reserve space to store caller's FP
10021+
totalAllocaSize += SIZE_OWORD;
994710022

994810023
// reserve space for all the alloca in the function subgroup
994910024
auto funcMDItr = m_currShader->m_ModuleMetadata->FuncMD.find(F);
995010025
if (funcMDItr != m_currShader->m_ModuleMetadata->FuncMD.end())
995110026
{
995210027
if (funcMDItr->second.privateMemoryPerWI != 0)
995310028
{
9954-
CVariable* pSP = m_currShader->GetSP();
9955-
unsigned totalAllocaSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
9956-
emitAddSP(pSP, pSP, m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
9957-
9958-
// Set the per-function private mem size
9959-
m_encoder->SetFunctionAllocaStackSize(F, totalAllocaSize);
9960-
10029+
totalAllocaSize += funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
996110030
if ((uint32_t)funcMDItr->second.privateMemoryPerWI > m_currShader->GetMaxPrivateMem())
996210031
{
996310032
m_currShader->GetContext()->EmitError("Private memory allocation exceeds max allowed size");
996410033
IGC_ASSERT(0);
996510034
}
996610035
}
996710036
}
10037+
10038+
// save FP before allocation
10039+
m_currShader->SaveStackState();
10040+
10041+
// Update SP and FP
10042+
emitPushToStack(m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD), false);
10043+
10044+
// Set the per-function private mem size
10045+
m_encoder->SetFunctionAllocaStackSize(F, totalAllocaSize);
996810046
}
996910047

997010048
void EmitPass::emitStackFuncExit(llvm::ReturnInst* inst)
997110049
{
9972-
// restore SP
9973-
m_currShader->RestoreSP();
10050+
// restore SP and FP
10051+
m_currShader->RestoreStackState();
997410052

997510053
llvm::Function* F = inst->getParent()->getParent();
997610054
llvm::Type* RetTy = F->getReturnType();
@@ -15955,6 +16033,35 @@ void EmitPass::emitGenISACopy(GenIntrinsicInst* GenCopyInst)
1595516033
emitCopyAll(Dst, Src, Ty);
1595616034
}
1595716035

16036+
// Puts FP on stack, update FP to SP, then update SP by pushOffset
16037+
// If isKernel, write special FP value instead to indicate base of the stack
16038+
void EmitPass::emitPushToStack(CVariable* pushOffset, bool isKernel)
16039+
{
16040+
CVariable* pFP = m_currShader->GetFP();
16041+
CVariable* pSP = m_currShader->GetSP();
16042+
if (isKernel)
16043+
{
16044+
// Put 0 into FP to indicate kernel stack base
16045+
m_encoder->Copy(pFP, m_currShader->ImmToVariable(0, ISA_TYPE_UQ));
16046+
m_encoder->Push();
16047+
}
16048+
16049+
// Store FP value into current SP
16050+
bool is64BitAddr = (pSP->GetSize() > 4);
16051+
if (is64BitAddr)
16052+
m_encoder->OWStoreA64(pFP, pSP, SIZE_OWORD, 0);
16053+
else
16054+
m_encoder->OWStore(pFP, ESURFACE_STATELESS, nullptr, pSP, SIZE_OWORD, 0);
16055+
m_encoder->Push();
16056+
16057+
// Set FP = SP
16058+
m_encoder->Copy(pFP, pSP);
16059+
m_encoder->Push();
16060+
16061+
// Update SP by pushOffset
16062+
emitAddSP(pSP, pSP, pushOffset);
16063+
}
16064+
1595816065
void EmitPass::emitAddSP(CVariable* Dst, CVariable* Src, CVariable* offset)
1595916066
{
1596016067
if (m_currShader->m_Platform->hasNoInt64Inst() &&

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ class EmitPass : public llvm::FunctionPass
143143
void emitStackFuncEntry(llvm::Function* F);
144144
void emitStackFuncExit(llvm::ReturnInst* inst);
145145
uint stackCallArgumentAlignment(CVariable* argv);
146+
void InitializeKernelStack(llvm::Function* pKernel);
146147

147148
// emits the visa relocation instructions for function/global symbols
148149
void emitSymbolRelocation(llvm::Function& F);
@@ -419,6 +420,7 @@ class EmitPass : public llvm::FunctionPass
419420
uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0);
420421
void emitCopyAll(CVariable* Dst, CVariable* Src, llvm::Type* Ty);
421422

423+
void emitPushToStack(CVariable* pushOffset, bool isKernel);
422424
void emitAddSP(CVariable* Dst, CVariable* Src, CVariable* offset);
423425
// emitAddPair - emulate 64bit addtition by 32-bit operations.
424426
// Dst and Src0 must be a 64-bit type variable.

0 commit comments

Comments
 (0)