Skip to content

Commit 0b10cfb

Browse files
jaladreipsigcbot
authored andcommitted
Internal changes for stack calls in RTX
Internal changes for stack calls in RTX
1 parent 3bab41f commit 0b10cfb

16 files changed

+169
-12
lines changed

IGC/AdaptorCommon/ProcessFuncAttributes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,7 @@ bool InsertDummyKernelForSymbolTable::runOnModule(Module& M)
10391039

10401040
// Check when we need to generate a dummy kernel. This is only useful for attaching
10411041
// the symbol table to its program output for indirect calls and global variable relocation.
1042-
if (IGC_IS_FLAG_ENABLED(EnableFunctionPointer) && pCtx->type == ShaderType::OPENCL_SHADER)
1042+
if (IGC_IS_FLAG_ENABLED(EnableFunctionPointer))
10431043
{
10441044
if (pCtx->m_enableFunctionPointer) {
10451045
// Symbols are needed for external functions and function pointers

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6513,6 +6513,8 @@ namespace IGC
65136513

65146514
pOutput->m_numGRFTotal = jitInfo->stats.numGRFTotal;
65156515
pOutput->m_numThreads = jitInfo->stats.numThreads;
6516+
6517+
pOutput->m_perThreadArgumentStackSize = m_argumentStackSize;
65166518
}
65176519

65186520
uint32_t CEncoder::getSpillMemSizeWithFG(const llvm::Function &curFunc,

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ namespace IGC
782782
VISAKernel* vKernelTmp;
783783
bool m_hasPrevKernel = false;
784784
unsigned int m_payloadEnd = 0;
785+
unsigned int m_argumentStackSize = 0;
785786

786787
bool m_isCodePatchCandidate = false;
787788

@@ -838,7 +839,8 @@ namespace IGC
838839
}
839840
void SetFunctionMaxArgumentStackSize(llvm::Function* F, unsigned size) {
840841
if (funcAttributeMap.find(F) != funcAttributeMap.end())
841-
funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
842+
m_argumentStackSize = funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
843+
842844
}
843845
void SetFunctionAllocaStackSize(llvm::Function* F, unsigned size) {
844846
if (funcAttributeMap.find(F) != funcAttributeMap.end())

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,20 @@ void CShader::InitializeStackVariables()
318318
m_LocalIdBufPtr = GetNewVariable(1, ISA_TYPE_UQ, EALIGN_QWORD, true, 1, "LocalIdPtr");
319319
encoder.GetVISAPredefinedVar(m_LocalIdBufPtr, PREDEFINED_LOCAL_ID_BUF_PTR);
320320
}
321+
322+
auto& argRegisterReservations = m_ctx->getModuleMetaData()->argRegisterReservations;
323+
m_ARGVReservedVariablesTotalSize = 0;
324+
325+
for (int i = 0; i < ARG_SPACE_RESERVATION_SLOTS::NUM_ARG_SPACE_RESERVATION_SLOTS; i++)
326+
{
327+
uint32_t reservationSize = argRegisterReservations[i];
328+
if (reservationSize)
329+
{
330+
auto aligned_offset = iSTD::Align(m_ARGVReservedVariablesTotalSize, reservationSize);
331+
m_ARGVReservedVariables[i] = GetNewAlias(GetARGV(), ISA_TYPE_W, aligned_offset, reservationSize, true);
332+
m_ARGVReservedVariablesTotalSize = aligned_offset + reservationSize;
333+
}
334+
}
321335
}
322336

323337
/// save FP of previous frame when entering a stack-call function
@@ -343,7 +357,11 @@ void CShader::RestoreStackState()
343357
// Restore FP to previous frame's FP
344358
encoder.Copy(m_FP, m_SavedFP);
345359
encoder.Push();
360+
361+
// Reset temp variables
346362
m_SavedFP = nullptr;
363+
for (auto& arg : m_ARGVReservedVariables)
364+
arg = nullptr;
347365
}
348366

349367
void CShader::CreateImplicitArgs()
@@ -1002,6 +1020,17 @@ CVariable* CShader::GetPrevFP()
10021020
{
10031021
return m_SavedFP;
10041022
}
1023+
1024+
CVariable* CShader::GetARGVReservedVariable(ARG_SPACE_RESERVATION_SLOTS slot)
1025+
{
1026+
return m_ARGVReservedVariables[slot];
1027+
}
1028+
1029+
uint32_t CShader::GetARGVReservedVariablesTotalSize()
1030+
{
1031+
return m_ARGVReservedVariablesTotalSize;
1032+
}
1033+
10051034
CVariable* CShader::GetSP()
10061035
{
10071036
IGC_ASSERT(m_SP);

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8661,9 +8661,18 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
86618661
case GenISAIntrinsic::GenISA_staticConstantPatchValue:
86628662
emitStaticConstantPatchValue(cast<StaticConstantPatchIntrinsic>(inst));
86638663
break;
8664+
case GenISAIntrinsic::GenISA_SetStackCallsBaseAddress:
8665+
emitSetStackCallsBaseAddress(inst);
8666+
break;
86648667
case GenISAIntrinsic::GenISA_SetImplicitBufferPtr:
86658668
emitStoreImplBufferPtr(inst);
86668669
break;
8670+
case GenISAIntrinsic::GenISA_SaveInReservedArgSpace:
8671+
emitSaveInReservedArgSpace(cast<SaveInReservedArgSpaceIntrinsic>(inst));
8672+
break;
8673+
case GenISAIntrinsic::GenISA_ReadFromReservedArgSpace:
8674+
emitReadFromReservedArgSpace(cast<ReadFromReservedArgSpaceIntrinsic>(inst));
8675+
break;
86678676
case GenISAIntrinsic::GenISA_SetLocalIdBufferPtr:
86688677
emitStoreLocalIdBufferPtr(inst);
86698678
break;
@@ -10521,7 +10530,7 @@ void EmitPass::emitStackAlloca(GenIntrinsicInst* GII)
1052110530
{
1052210531
// Static private mem access is done through the FP
1052310532
CVariable* pFP = m_currShader->GetFP();
10524-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
10533+
if (IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack) && m_pCtx->type != ShaderType::RAYTRACING_SHADER)
1052510534
{
1052610535
// If we have written the previous FP to the current frame's start, the start of
1052710536
// private memory will be offset.
@@ -10611,13 +10620,14 @@ void EmitPass::emitReturn(llvm::ReturnInst* inst)
1061110620
}
1061210621

1061310622
/// Initializes the kernel for stack call by initializing the SP and FP
10614-
void EmitPass::InitializeKernelStack(Function* pKernel)
10623+
void EmitPass::InitializeKernelStack(Function* pKernel, CVariable* stackBufferBase)
1061510624
{
1061610625
m_currShader->InitializeStackVariables();
1061710626
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
1061810627
auto pModMD = pCtx->getModuleMetaData();
1061910628

10620-
CVariable* pStackBufferBase = m_currShader->GetPrivateBase();
10629+
auto* pStackBufferBase = stackBufferBase ? stackBufferBase : m_currShader->GetPrivateBase();
10630+
1062110631
CVariable* pHWTID = m_currShader->GetHWTID();
1062210632

1062310633
IGC_ASSERT(pModMD->FuncMD.find(pKernel) != pModMD->FuncMD.end());
@@ -10968,6 +10978,9 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
1096810978
IGC_ASSERT(!m_encoder->IsSecondHalf());
1096910979
bool hasSecondHalf = (m_currShader->m_numberInstance == 2) && (m_currShader->m_dispatchSize == SIMDMode::SIMD32);
1097010980

10981+
offsetA += m_currShader->GetARGVReservedVariablesTotalSize();
10982+
IGC_ASSERT(offsetA < ArgBlkVar->GetSize());
10983+
1097110984
for (uint32_t i = 0; i < IGCLLVM::getNumArgOperands(inst); i++)
1097210985
{
1097310986
Value* operand = inst->getArgOperand(i);
@@ -11221,6 +11234,9 @@ void EmitPass::emitStackFuncEntry(Function* F)
1122111234
uint32_t offsetS = 0; // visa stack offset
1122211235
std::vector<CVariable*> argsOnStack;
1122311236

11237+
offsetA += m_currShader->GetARGVReservedVariablesTotalSize();
11238+
IGC_ASSERT(offsetA < ArgBlkVar->GetSize());
11239+
1122411240
IGC_ASSERT(!m_encoder->IsSecondHalf());
1122511241
bool hasSecondHalf = (m_currShader->m_numberInstance == 2) && (m_currShader->m_dispatchSize == SIMDMode::SIMD32);
1122611242
SmallVector<std::tuple<CVariable*, CVariable*, Type*>, 8> StructShuffleVector;
@@ -18869,7 +18885,7 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1886918885
// Update SP by pushSize
1887018886
emitAddPointer(pSP, pSP, m_currShader->ImmToVariable(pushSize, ISA_TYPE_UD));
1887118887

18872-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
18888+
if (IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack) && m_pCtx->type != ShaderType::RAYTRACING_SHADER) // RTX provides own stack memory, storing the FP at this point is invalid
1887318889
{
1887418890
// Store old FP value to current FP
1887518891
CVariable* pOldFP = m_currShader->GetPrevFP();
@@ -20432,6 +20448,35 @@ void EmitPass::emitStoreImplBufferPtr(llvm::GenIntrinsicInst* I)
2043220448
m_currShader->CopyVariable(m_currShader->GetImplArgBufPtr(), GetSymbol(I->getArgOperand(0)));
2043320449
}
2043420450

20451+
void EmitPass::emitSetStackCallsBaseAddress(llvm::GenIntrinsicInst* I)
20452+
{
20453+
InitializeKernelStack(I->getFunction(), GetSymbol(I->getArgOperand(0)));
20454+
}
20455+
20456+
void EmitPass::emitSaveInReservedArgSpace(llvm::SaveInReservedArgSpaceIntrinsic* I)
20457+
{
20458+
if (!m_currShader->HasStackCalls() || m_encoder->IsSecondHalf())
20459+
return;
20460+
20461+
m_encoder->Copy(
20462+
m_currShader->createAliasIfNeeded(I->getData(), m_currShader->GetARGVReservedVariable(I->getSlot())),
20463+
GetSymbol(I->getData())
20464+
);
20465+
m_encoder->Push();
20466+
}
20467+
20468+
void EmitPass::emitReadFromReservedArgSpace(llvm::ReadFromReservedArgSpaceIntrinsic* I)
20469+
{
20470+
if (m_encoder->IsSecondHalf())
20471+
return;
20472+
20473+
m_encoder->Copy(
20474+
m_destination,
20475+
m_currShader->createAliasIfNeeded(I, m_currShader->GetARGVReservedVariable(I->getSlot()))
20476+
);
20477+
m_encoder->Push();
20478+
}
20479+
2043520480
void EmitPass::emitStoreLocalIdBufferPtr(llvm::GenIntrinsicInst* I)
2043620481
{
2043720482
if(m_currShader->HasStackCalls() &&

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class EmitPass : public llvm::FunctionPass
158158
void emitStackCall(llvm::CallInst* inst);
159159
void emitStackFuncEntry(llvm::Function* F);
160160
void emitStackFuncExit(llvm::ReturnInst* inst);
161-
void InitializeKernelStack(llvm::Function* pKernel);
161+
void InitializeKernelStack(llvm::Function* pKernel, CVariable* stackBufferBase = nullptr);
162162

163163
/// stack-call functions for reading and writing argument/retval data to stack
164164
typedef SmallVector<std::tuple<CVariable*, uint32_t, uint32_t, uint32_t, bool>, 8> StackDataBlocks;
@@ -609,6 +609,9 @@ class EmitPass : public llvm::FunctionPass
609609
void emitLaunder(llvm::GenIntrinsicInst* GII);
610610
void emitImplicitArgIntrinsic(llvm::GenIntrinsicInst* I);
611611
void emitStoreImplBufferPtr(llvm::GenIntrinsicInst* I);
612+
void emitSetStackCallsBaseAddress(llvm::GenIntrinsicInst* I);
613+
void emitSaveInReservedArgSpace(llvm::SaveInReservedArgSpaceIntrinsic* I);
614+
void emitReadFromReservedArgSpace(llvm::ReadFromReservedArgSpaceIntrinsic* I);
612615
void emitStoreLocalIdBufferPtr(llvm::GenIntrinsicInst* I);
613616
void emitLoadImplBufferPtr(llvm::GenIntrinsicInst* I);
614617
void emitLoadLocalIdBufferPtr(llvm::GenIntrinsicInst* I);

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,8 +1407,7 @@ InlineCost SubroutineInliner::getInlineCost(IGCLLVM::CallSiteRef CS)
14071407
if (pCtx->m_enableSubroutine == false)
14081408
return llvm::InlineCost::getAlways("Disabled subroutines/stackcalls");
14091409

1410-
if (pCtx->type == ShaderType::OPENCL_SHADER &&
1411-
Callee->hasFnAttribute(llvm::Attribute::NoInline))
1410+
if (Callee->hasFnAttribute(llvm::Attribute::NoInline))
14121411
return llvm::InlineCost::getNever("Per NoInline function attribute");
14131412

14141413
if (Callee->hasFnAttribute("KMPLOCK"))

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ class CShader
241241
CVariable* GetSP();
242242
CVariable* GetFP();
243243
CVariable* GetPrevFP();
244+
CVariable* GetARGVReservedVariable(ARG_SPACE_RESERVATION_SLOTS slot);
245+
uint32_t GetARGVReservedVariablesTotalSize();
244246
CVariable* GetARGV();
245247
CVariable* GetRETV();
246248
CVariable* GetPrivateBase();
@@ -730,6 +732,8 @@ class CShader
730732
CVariable* m_FP;
731733
CVariable* m_SavedFP;
732734
CVariable* m_ARGV;
735+
std::array<CVariable*, NUM_ARG_SPACE_RESERVATION_SLOTS> m_ARGVReservedVariables{};
736+
uint32_t m_ARGVReservedVariablesTotalSize = 0;
733737
CVariable* m_RETV;
734738
CVariable* m_SavedSRetPtr;
735739
CVariable* m_ImplArgBufPtr;

IGC/Compiler/CodeGenContext.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ namespace IGC
542542
}
543543
}
544544
m_enableSubroutine = EnableSubroutine;
545-
m_hasStackCalls = EnableStackFuncs;
545+
m_hasStackCalls |= EnableStackFuncs;
546546
}
547547

548548
// check if DP emu is required

IGC/Compiler/CodeGenPublic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ namespace IGC
160160

161161
unsigned int m_numThreads = 0;
162162

163+
unsigned int m_perThreadArgumentStackSize = 0;
164+
163165
void Destroy()
164166
{
165167
if (m_programBin)

IGC/Compiler/CodeGenPublicEnums.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,12 @@ namespace IGC
330330
WO_YZX = 4,
331331
WO_ZYX = 5
332332
};
333+
334+
enum ARG_SPACE_RESERVATION_SLOTS {
335+
RTX_GLOBAL_BUFFER_PTR,
336+
337+
NUM_ARG_SPACE_RESERVATION_SLOTS
338+
};
333339
}
334340

335341
#endif //CODE_GEN_PUBLIC_ENUMS_H_

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,12 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
330330
// Analyze call depth for stack memory required
331331
maxPrivateMem = AnalyzeCGPrivateMemUsage(pKernel);
332332
}
333-
if ((FG->hasIndirectCall() && FG->hasPartialCallGraph()) || FG->hasRecursion())
333+
if (((FG->hasIndirectCall() && FG->hasPartialCallGraph()) || FG->hasRecursion()) && Ctx.type != ShaderType::RAYTRACING_SHADER)
334334
{
335335
// If indirect calls or recursions exist, add additional 4KB and hope we don't run out.
336336
maxPrivateMem += (4 * 1024);
337337
}
338-
if (FG->hasVariableLengthAlloca())
338+
if (FG->hasVariableLengthAlloca() && Ctx.type != ShaderType::RAYTRACING_SHADER)
339339
{
340340
expandPrivateMemoryForVla(maxPrivateMem);
341341
}

IGC/GenISAIntrinsics/GenIntrinsicInst.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,8 +1768,53 @@ class EmitHitAttributesIntrinstic: public GenIntrinsicInst
17681768
{
17691769
return isa<GenIntrinsicInst>(V) && classof(cast<GenIntrinsicInst>(V));
17701770
}
1771+
};
1772+
1773+
class SaveInReservedArgSpaceIntrinsic : public GenIntrinsicInst
1774+
{
1775+
public:
1776+
// Methods for support type inquiry through isa, cast, and dyn_cast:
1777+
static inline bool classof(const GenIntrinsicInst* I)
1778+
{
1779+
GenISAIntrinsic::ID ID = I->getIntrinsicID();
1780+
return ID == GenISAIntrinsic::GenISA_SaveInReservedArgSpace;
1781+
}
1782+
1783+
static inline bool classof(const Value* V)
1784+
{
1785+
return isa<GenIntrinsicInst>(V) && classof(cast<GenIntrinsicInst>(V));
1786+
}
1787+
1788+
IGC::ARG_SPACE_RESERVATION_SLOTS getSlot()
1789+
{
1790+
return static_cast<IGC::ARG_SPACE_RESERVATION_SLOTS>(cast<ConstantInt>(getOperand(0))->getZExtValue());
1791+
}
1792+
1793+
Value* getData()
1794+
{
1795+
return getOperand(1);
1796+
}
1797+
};
1798+
1799+
class ReadFromReservedArgSpaceIntrinsic : public GenIntrinsicInst
1800+
{
1801+
public:
1802+
// Methods for support type inquiry through isa, cast, and dyn_cast:
1803+
static inline bool classof(const GenIntrinsicInst* I)
1804+
{
1805+
GenISAIntrinsic::ID ID = I->getIntrinsicID();
1806+
return ID == GenISAIntrinsic::GenISA_ReadFromReservedArgSpace;
1807+
}
17711808

1809+
static inline bool classof(const Value* V)
1810+
{
1811+
return isa<GenIntrinsicInst>(V) && classof(cast<GenIntrinsicInst>(V));
1812+
}
17721813

1814+
IGC::ARG_SPACE_RESERVATION_SLOTS getSlot()
1815+
{
1816+
return static_cast<IGC::ARG_SPACE_RESERVATION_SLOTS>(cast<ConstantInt>(getOperand(0))->getZExtValue());
1817+
}
17731818
};
17741819

17751820
template <class X, class Y>

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,11 @@
529529
("int", "15: sample index")],
530530
"None"]],
531531
####################################################################################################
532+
"GenISA_ReadFromReservedArgSpace": ["Read from a reserved slot from ARGV variable when using stack calls. Requires a valid entry in module metadata",
533+
[("any", "data"),
534+
[("int", "slot number")],
535+
"InaccessibleMemOnly"]],
536+
####################################################################################################
532537
"GenISA_RenderTargetRead": ["",
533538
[("float4", "result"),
534539
[("int", "render target slot")],
@@ -555,6 +560,17 @@
555560
[("int", "sample index")],
556561
"NoMem"]],
557562
####################################################################################################
563+
"GenISA_SaveInReservedArgSpace": ["Saves value to a reserved slot in ARGV variable when using stack calls. Requires a valid entry in module metadata",
564+
[("any", "result"),
565+
[("int", "slot number"),
566+
("any", "data")],
567+
"InaccessibleMemOnly"]],
568+
####################################################################################################
569+
"GenISA_SetStackCallsBaseAddress": ["Set the base value for StackAlloca calculations. This address should not be calculated per thread",
570+
[("void", "result"),
571+
[("anyptr", "buffer pointer passed by runtime")],
572+
"InaccessibleMemOnly"]],
573+
####################################################################################################
558574
"GenISA_SetImplicitBufferPtr": ["",
559575
[("void", "result"),
560576
[("anyptr", "buffer pointer passed by runtime")],

IGC/common/MDFrameWork.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,10 @@ namespace IGC
746746
std::array<uint64_t, NUM_SHADER_RESOURCE_VIEW_SIZE> m_ShaderResourceViewMcsMask{};
747747
unsigned int computedDepthMode = 0; //Defaults to 0 meaning depth mode is off
748748
bool isHDCFastClearShader = false;
749+
750+
std::array<uint32_t, NUM_ARG_SPACE_RESERVATION_SLOTS> argRegisterReservations{};
749751
};
752+
750753
void serialize(const IGC::ModuleMetaData &moduleMD, llvm::Module* module);
751754
void deserialize(IGC::ModuleMetaData &deserializedMD, const llvm::Module* module);
752755

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -894,6 +894,7 @@ DECLARE_IGC_GROUP("Raytracing Options")
894894
DECLARE_IGC_REGKEY(bool, DisablePredicatedStackIDRelease, false, "Emit a single stack ID release at the end of the shader", true)
895895
DECLARE_IGC_REGKEY(bool, DisableCrossFillRemat, false, "Rematerialize values if they use already spilled values", true)
896896
DECLARE_IGC_REGKEY(bool, EnableSyncDispatchRays, false, "Enable sync DispatchRays implementation", false)
897+
DECLARE_IGC_REGKEY(bool, ForceIndirectCallsInSyncDispatchRays, false, "Will skip direct calls in synchronous raytracing and immediately call raytracing shaders via KSP shader ptr", false)
897898
DECLARE_IGC_REGKEY(bool, ForceRTRetry, false, "Raytracing is compiled in the second retry state", false)
898899
DECLARE_IGC_REGKEY(bool, AllowRTRetryPickBetter, false, "Allows raytracing retry to pick the best compilation instead of always using the retry compilation.", false)
899900
DECLARE_IGC_REGKEY(bool, EnableFillScheduling, false, "Schedule fills for reduced register pressure", false)

0 commit comments

Comments
 (0)