Skip to content

Commit 4bb0559

Browse files
pratikasharigcbot
authored andcommitted
Implement support for implicit arguments in stack call functions.
1 parent 794f003 commit 4bb0559

21 files changed

+407
-24
lines changed

IGC/AdaptorCommon/ImplicitArgs.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ ImplicitArgs::ImplicitArgs(const llvm::Function& func , const MetaDataUtils* pMd
310310

311311
IMPLICIT_ARGS.push_back(ImplicitArg(ImplicitArg::SYNC_BUFFER, "syncBuffer", ImplicitArg::GLOBALPTR, WIAnalysis::UNIFORM, 1, ImplicitArg::ALIGN_PTR, false));
312312

313+
IMPLICIT_ARGS.push_back(ImplicitArg(ImplicitArg::GLOBAL_STATE_BUFFER_PTR, "gblStateBufferPtr", ImplicitArg::PRIVATEPTR, WIAnalysis::UNIFORM, 1, ImplicitArg::ALIGN_PTR, true));
314+
313315
IGC_ASSERT_MESSAGE((IMPLICIT_ARGS.size() == ImplicitArg::NUM_IMPLICIT_ARGS), "Mismatch in NUM_IMPLICIT_ARGS and IMPLICIT_ARGS vector");
314316

315317
{

IGC/AdaptorCommon/ImplicitArgs.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ namespace IGC
128128

129129
SYNC_BUFFER,
130130

131+
// Side buffer ptr
132+
GLOBAL_STATE_BUFFER_PTR,
133+
131134
NUM_IMPLICIT_ARGS
132135
};
133136

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ void CShader::InitEncoder(SIMDMode simdSize, bool canAbortOnSpill, ShaderDispatc
104104
m_SavedFP = nullptr;
105105
m_ARGV = nullptr;
106106
m_RETV = nullptr;
107+
m_GlobalStateBufPtr = nullptr;
107108

108109
// SIMD32 is a SIMD16 shader with 2 instance of each instruction
109110
m_SIMDSize = (simdSize == SIMDMode::SIMD8 ? SIMDMode::SIMD8 : SIMDMode::SIMD16);
@@ -242,6 +243,10 @@ void CShader::InitializeStackVariables()
242243
// create frame-pointer register
243244
m_FP = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "FP");
244245
encoder.GetVISAPredefinedVar(m_FP, PREDEFINED_FE_FP);
246+
// create side buffer ptr
247+
m_GlobalStateBufPtr = GetNewVariable(1, GetContext()->getRegisterPointerSizeInBits(ADDRESS_SPACE_GLOBAL) ? ISA_TYPE_UQ : ISA_TYPE_UD,
248+
EALIGN_QWORD, true, 1, "SideBufferPtr");
249+
encoder.GetVISAPredefinedVar(m_GlobalStateBufPtr, PREDEFINED_GLOBALBUFFERPTR);
245250
}
246251

247252
/// save FP of previous frame when entering a stack-call function
@@ -800,6 +805,36 @@ CVariable* CShader::GetRETV()
800805
return m_RETV;
801806
}
802807

808+
CVariable* CShader::GetGlobalStateBufferPtr()
809+
{
810+
return m_GlobalStateBufPtr;
811+
}
812+
813+
CVariable* CShader::GetGlobalStateBufferInput()
814+
{
815+
// Return CVariable* corresponding to kernel argument
816+
ImplicitArgs implicitArgs(*entry, m_pMdUtils);
817+
unsigned numPushArgs = m_ModuleMetadata->pushInfo.pushAnalysisWIInfos.size();
818+
unsigned numImplicitArgs = implicitArgs.size();
819+
unsigned numFuncArgs = entry->arg_size() - numImplicitArgs - numPushArgs;
820+
821+
Argument* kerArg = nullptr;
822+
llvm::Function::arg_iterator arg = entry->arg_begin();
823+
for (unsigned i = 0; i < numFuncArgs; ++i, ++arg);
824+
for (unsigned i = 0; i < numImplicitArgs; ++i, ++arg) {
825+
ImplicitArg implicitArg = implicitArgs[i];
826+
if (implicitArg.getArgType() == ImplicitArg::ArgType::GLOBAL_STATE_BUFFER_PTR)
827+
{
828+
kerArg = (&*arg);
829+
break;
830+
}
831+
}
832+
if (!kerArg)
833+
return nullptr;
834+
IGC_ASSERT(kerArg);
835+
return GetSymbol(kerArg);
836+
}
837+
803838
CEncoder& CShader::GetEncoder()
804839
{
805840
return encoder;
@@ -2064,9 +2099,11 @@ void CShader::BeginFunction(llvm::Function* F)
20642099
bool useStackCall = m_FGA && m_FGA->useStackCall(F);
20652100
if (useStackCall)
20662101
{
2067-
m_R0 = nullptr;
20682102
globalSymbolMapping.clear();
20692103
encoder.BeginStackFunction(F);
2104+
// create pre-defined r0
2105+
m_R0 = GetNewVariable(getGRFSize() / SIZE_DWORD, ISA_TYPE_D, EALIGN_GRF, false, 1, "R0");
2106+
encoder.GetVISAPredefinedVar(m_R0, PREDEFINED_R0);
20702107
}
20712108
else
20722109
{

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8315,6 +8315,12 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
83158315
case GenISAIntrinsic::GenISA_CatchAllDebugLine:
83168316
emitDebugPlaceholder(inst);
83178317
break;
8318+
case GenISAIntrinsic::GenISA_getR0:
8319+
emitR0(inst);
8320+
break;
8321+
case GenISAIntrinsic::GenISA_getGlobalStateBufferPtr:
8322+
emitGlobalStateBufferPtr(inst);
8323+
break;
83188324
case GenISAIntrinsic::GenISA_dummyInst:
83198325
emitDummyInst(inst);
83208326
break;
@@ -9794,6 +9800,14 @@ void EmitPass::InitializeKernelStack(Function* pKernel)
97949800
pSize = m_currShader->ImmToVariable(MaxPrivateSize * numLanes(m_currShader->m_dispatchSize), ISA_TYPE_UD);
97959801
}
97969802

9803+
auto pSideBufferPtr = m_currShader->GetGlobalStateBufferPtr();
9804+
auto pSideBufferInputPtr = m_currShader->GetGlobalStateBufferInput();
9805+
if (pSideBufferInputPtr)
9806+
{
9807+
m_encoder->Cast(pSideBufferPtr, pSideBufferInputPtr);
9808+
m_encoder->Push();
9809+
}
9810+
97979811
CVariable* pThreadOffset = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, CName::NONE);
97989812
m_encoder->Mul(pThreadOffset, pHWTID, pSize);
97999813
m_encoder->Push();
@@ -17218,6 +17232,23 @@ void EmitPass::emitDummyInst(llvm::GenIntrinsicInst* GII)
1721817232
m_encoder->Push();
1721917233
}
1722017234

17235+
void EmitPass::emitR0(llvm::GenIntrinsicInst* I)
17236+
{
17237+
if (I->getNumUses() == 0)
17238+
return;
17239+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(m_currShader->getGRFSize() / SIZE_DWORD));
17240+
m_encoder->SetNoMask();
17241+
m_currShader->CopyVariable(GetSymbol(I), m_currShader->GetR0());
17242+
}
17243+
17244+
void EmitPass::emitGlobalStateBufferPtr(llvm::GenIntrinsicInst* I)
17245+
{
17246+
if (I->getNumUses() == 0)
17247+
return;
17248+
m_encoder->SetNoMask();
17249+
m_currShader->CopyVariable(GetSymbol(I), m_currShader->GetGlobalStateBufferPtr());
17250+
}
17251+
1722117252

1722217253

1722317254

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ class EmitPass : public llvm::FunctionPass
451451
void emitLifetimeStartAtEndOfBB(llvm::BasicBlock* BB);
452452
void emitDebugPlaceholder(llvm::GenIntrinsicInst* I);
453453
void emitDummyInst(llvm::GenIntrinsicInst* GII);
454+
void emitR0(llvm::GenIntrinsicInst* I);
455+
void emitGlobalStateBufferPtr(llvm::GenIntrinsicInst* I);
454456

455457
std::pair<llvm::Value*, llvm::Value*> getPairOutput(llvm::Value*) const;
456458

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,7 @@ namespace IGC
10261026
}
10271027
break;
10281028

1029+
//case KernelArg::ArgType::GLOBAL_STATE_BUFFER_PTR:
10291030
case KernelArg::ArgType::IMPLICIT_CONSTANT_BASE:
10301031
{
10311032
int argNo = kernelArg->getAssociatedArgNo();

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ class CShader
193193
CVariable* GetARGV();
194194
CVariable* GetRETV();
195195
CVariable* GetPrivateBase();
196+
CVariable* GetGlobalStateBufferPtr();
197+
CVariable* GetGlobalStateBufferInput();
196198

197199
bool hasSP() const { return m_SP != nullptr; }
198200
bool hasFP() const { return m_FP != nullptr; }
@@ -543,6 +545,7 @@ class CShader
543545
CVariable* m_SavedFP;
544546
CVariable* m_ARGV;
545547
CVariable* m_RETV;
548+
CVariable* m_GlobalStateBufPtr;
546549

547550
std::vector<USC::SConstantGatherEntry> gatherMap;
548551
uint m_ConstantBufferLength;

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,9 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
11611161
GII_id == GenISAIntrinsic::GenISA_eu_id ||
11621162
GII_id == GenISAIntrinsic::GenISA_eu_thread_id ||
11631163
GII_id == GenISAIntrinsic::GenISA_hw_thread_id ||
1164-
GII_id == GenISAIntrinsic::GenISA_hw_thread_id_alloca)
1164+
GII_id == GenISAIntrinsic::GenISA_hw_thread_id_alloca ||
1165+
GII_id == GenISAIntrinsic::GenISA_getR0 ||
1166+
GII_id == GenISAIntrinsic::GenISA_getGlobalStateBufferPtr)
11651167
{
11661168
if (intrinsic_name == llvm_input ||
11671169
intrinsic_name == llvm_shaderinputvec)
@@ -1238,6 +1240,12 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
12381240
}
12391241
}
12401242

1243+
if(GII_id == GenISAIntrinsic::GenISA_getR0 ||
1244+
GII_id == GenISAIntrinsic::GenISA_getGlobalStateBufferPtr)
1245+
{
1246+
return WIAnalysis::UNIFORM;
1247+
}
1248+
12411249
// Iterate over all input dependencies. If all are uniform - propagate it.
12421250
// otherwise - return RANDOM
12431251
unsigned numParams = inst->getNumArgOperands();

IGC/Compiler/DebugInfo/ScalarVISAModule.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,8 @@ ScalarVisaModule::GetVariableLocation(const llvm::Instruction* pInst) const
377377
&& modMD->FuncMD.find(const_cast<Function*>(curFunc)) != modMD->FuncMD.end())
378378
{
379379
unsigned int explicitArgsNum = curFunc->arg_size() - itr->second->size_ImplicitArgInfoList();
380-
if (pArgument->getArgNo() < explicitArgsNum)
380+
if (pArgument->getArgNo() < explicitArgsNum &&
381+
modMD->FuncMD[const_cast<Function*>(curFunc)].m_OpenCLArgBaseTypes.size() > pArgument->getArgNo())
381382
{
382383
const std::string typeStr = modMD->FuncMD[const_cast<Function*>(curFunc)].m_OpenCLArgBaseTypes[pArgument->getArgNo()];
383384
KernelArg::ArgType argType = KernelArg::calcArgType(pArgument, typeStr);

IGC/Compiler/Optimizer/OpenCLPasses/KernelArgs.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ KernelArg::ArgType KernelArg::calcArgType(const ImplicitArg& arg) const
238238
{
239239
case ImplicitArg::R0:
240240
return KernelArg::ArgType::IMPLICIT_R0;
241+
case ImplicitArg::GLOBAL_STATE_BUFFER_PTR:
242+
return KernelArg::ArgType::IMPLICIT_GLOBAL_STATE_BUFFER_PTR;
241243
case ImplicitArg::PAYLOAD_HEADER:
242244
return KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER;
243245
case ImplicitArg::PRIVATE_BASE:
@@ -787,6 +789,8 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
787789
KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
788790
KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
789791

792+
KernelArg::ArgType::IMPLICIT_GLOBAL_STATE_BUFFER_PTR,
793+
790794
KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
791795
KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
792796
KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,
@@ -882,6 +886,7 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
882886
KernelArg::ArgType::RUNTIME_VALUE,
883887

884888
KernelArg::ArgType::IMPLICIT_PAYLOAD_HEADER,
889+
885890
KernelArg::ArgType::PTR_LOCAL,
886891
KernelArg::ArgType::PTR_GLOBAL,
887892
KernelArg::ArgType::PTR_CONSTANT,
@@ -902,6 +907,8 @@ KernelArgsOrder::KernelArgsOrder(InputType layout)
902907
KernelArg::ArgType::IMPLICIT_STAGE_IN_GRID_SIZE,
903908
KernelArg::ArgType::IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
904909

910+
KernelArg::ArgType::IMPLICIT_GLOBAL_STATE_BUFFER_PTR,
911+
905912
KernelArg::ArgType::IMPLICIT_IMAGE_HEIGHT,
906913
KernelArg::ArgType::IMPLICIT_IMAGE_WIDTH,
907914
KernelArg::ArgType::IMPLICIT_IMAGE_DEPTH,

IGC/Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ namespace IGC
8585
IMPLICIT_LOCAL_SIZE,
8686
IMPLICIT_ENQUEUED_LOCAL_WORK_SIZE,
8787

88+
IMPLICIT_GLOBAL_STATE_BUFFER_PTR,
89+
8890
IMPLICIT_IMAGE_HEIGHT,
8991
IMPLICIT_IMAGE_WIDTH,
9092
IMPLICIT_IMAGE_DEPTH,

0 commit comments

Comments
 (0)