Skip to content

Commit 4bff40d

Browse files
weiyu-chenigcbot
authored andcommitted
Force private memory to global buffer when generic load/store are present (off for now)
1 parent dd3236e commit 4bff40d

File tree

6 files changed

+55
-37
lines changed

6 files changed

+55
-37
lines changed

IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ CheckInstrTypes::CheckInstrTypes(IGC::SInstrTypes* instrList) : FunctionPass(ID)
103103
instrList->sampleCmpToDiscardOptimizationPossible = false;
104104
instrList->sampleCmpToDiscardOptimizationSlot = 0;
105105
instrList->hasPullBary = false;
106+
instrList->hasDynamicGenericLoadStore = false;
106107
}
107108

108109
void CheckInstrTypes::SetLoopFlags(Function& F)

IGC/Compiler/CISACodeGen/Platform.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,11 @@ unsigned getSlmSizePerSsOrDss() const
461461
return slmSizePerSsOrDss;
462462
}
463463

464+
bool canForcePrivateToGlobal() const
465+
{
466+
return m_platformInfo.eRenderCoreFamily >= IGFX_GEN9_CORE && IGC_IS_FLAG_ENABLED(ForcePrivateMemoryToGlobalOnGeneric);
467+
}
468+
464469
bool hasNoFullI64Support() const
465470
{
466471
return hasNoInt64Inst();

IGC/Compiler/CodeGenPublic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ namespace IGC
284284
unsigned int numInsts; //<! measured after optimization, used as a compiler heuristic
285285
unsigned int numAllocaInsts;
286286
unsigned int numPsInputs;
287+
bool hasDynamicGenericLoadStore;
287288
};
288289

289290
struct SSimplePushInfo

IGC/Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.cpp

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,10 @@ bool GenericAddressDynamicResolution::visitLoadStoreInst(Instruction& I)
225225
IGC_ASSERT_EXIT_MESSAGE(0, "Unable to resolve generic address space pointer");
226226
}
227227

228+
m_ctx->m_instrTypes.hasDynamicGenericLoadStore = true;
229+
228230
if (pointerAddressSpace == ADDRESS_SPACE_GENERIC) {
229-
if (m_ctx->forceGlobalMemoryAllocation() && m_ctx->hasNoLocalToGenericCast())
231+
if ((m_ctx->forceGlobalMemoryAllocation() || m_ctx->platform.canForcePrivateToGlobal()) && m_ctx->hasNoLocalToGenericCast())
230232
{
231233
resolveGASWithoutBranches(I, pointerOperand);
232234
}
@@ -271,27 +273,32 @@ void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerO
271273
Value* privateLoad = nullptr;
272274
Value* globalLoad = nullptr;
273275

276+
bool hasPrivate = !m_ctx->platform.canForcePrivateToGlobal();
277+
bool hasLocal = !m_ctx->hasNoLocalToGenericCast();
274278

275279
// Private branch
276-
privateBlock = BasicBlock::Create(I.getContext(), "PrivateBlock", convergeBlock->getParent(), convergeBlock);
280+
if (hasPrivate)
277281
{
278-
IRBuilder<> privateBuilder(privateBlock);
279-
PointerType* ptrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_PRIVATE);
280-
Value* privatePtr = privateBuilder.CreateAddrSpaceCast(pointerOperand, ptrType);
281-
282-
if (LoadInst* LI = dyn_cast<LoadInst>(&I))
282+
privateBlock = BasicBlock::Create(I.getContext(), "PrivateBlock", convergeBlock->getParent(), convergeBlock);
283283
{
284-
privateLoad = privateBuilder.CreateAlignedLoad(privatePtr, getAlign(LI->getAlignment()), LI->isVolatile(), "privateLoad");
285-
}
286-
else if (StoreInst* SI = dyn_cast<StoreInst>(&I))
287-
{
288-
privateBuilder.CreateAlignedStore(I.getOperand(0), privatePtr, getAlign(SI->getAlignment()), SI->isVolatile());
284+
IRBuilder<> privateBuilder(privateBlock);
285+
PointerType* ptrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_PRIVATE);
286+
Value* privatePtr = privateBuilder.CreateAddrSpaceCast(pointerOperand, ptrType);
287+
288+
if (LoadInst* LI = dyn_cast<LoadInst>(&I))
289+
{
290+
privateLoad = privateBuilder.CreateAlignedLoad(privatePtr, getAlign(LI->getAlignment()), LI->isVolatile(), "privateLoad");
291+
}
292+
else if (StoreInst* SI = dyn_cast<StoreInst>(&I))
293+
{
294+
privateBuilder.CreateAlignedStore(I.getOperand(0), privatePtr, getAlign(SI->getAlignment()), SI->isVolatile());
295+
}
296+
privateBuilder.CreateBr(convergeBlock);
289297
}
290-
privateBuilder.CreateBr(convergeBlock);
291298
}
292299

293300
// Local Branch
294-
if (!m_ctx->hasNoLocalToGenericCast())
301+
if (hasLocal)
295302
{
296303
localBlock = BasicBlock::Create(I.getContext(), "LocalBlock", convergeBlock->getParent(), convergeBlock);
297304
// Local
@@ -332,35 +339,33 @@ void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerO
332339
currentBlock->getTerminator()->eraseFromParent();
333340
builder.SetInsertPoint(currentBlock);
334341

335-
// Local branch can be saved if there are no local to generic casts
336-
if (m_ctx->hasNoLocalToGenericCast())
337-
{
338-
SwitchInst* switchTag = builder.CreateSwitch(tag, globalBlock, 1);
339-
// Based on tag there are two cases 001: private, 000/111: global
340-
switchTag->addCase(privateTag, privateBlock);
342+
int numPrivateLocal = (hasPrivate && hasLocal) ? 2 : ((hasPrivate || hasLocal) ? 1 : 0);
343+
assert(numPrivateLocal > 0);
341344

342-
if ((privateLoad != nullptr) && (globalLoad != nullptr))
343-
{
344-
IRBuilder<> phiBuilder(&(*convergeBlock->begin()));
345-
PHINode* phi = phiBuilder.CreatePHI(I.getType(), 2, I.getName());
346-
phi->addIncoming(privateLoad, privateBlock);
347-
phi->addIncoming(globalLoad, globalBlock);
348-
I.replaceAllUsesWith(phi);
349-
}
350-
}
351-
else
352345
{
353-
SwitchInst* switchTag = builder.CreateSwitch(tag, globalBlock, 2);
346+
SwitchInst* switchTag = builder.CreateSwitch(tag, globalBlock, numPrivateLocal);
354347
// Based on tag there are two cases 001: private, 010: local, 000/111: global
355-
switchTag->addCase(privateTag, privateBlock);
356-
switchTag->addCase(localTag, localBlock);
348+
if (hasPrivate)
349+
{
350+
switchTag->addCase(privateTag, privateBlock);
351+
}
352+
if (hasLocal)
353+
{
354+
switchTag->addCase(localTag, localBlock);
355+
}
357356

358-
if ((privateLoad != nullptr) && (localLoad != nullptr) && (globalLoad != nullptr))
357+
if (isa<LoadInst>(&I))
359358
{
360359
IRBuilder<> phiBuilder(&(*convergeBlock->begin()));
361-
PHINode* phi = phiBuilder.CreatePHI(I.getType(), 3, I.getName());
362-
phi->addIncoming(privateLoad, privateBlock);
363-
phi->addIncoming(localLoad, localBlock);
360+
PHINode* phi = phiBuilder.CreatePHI(I.getType(), numPrivateLocal + 1, I.getName());
361+
if (privateLoad)
362+
{
363+
phi->addIncoming(privateLoad, privateBlock);
364+
}
365+
if (localLoad)
366+
{
367+
phi->addIncoming(localLoad, localBlock);
368+
}
364369
phi->addIncoming(globalLoad, globalBlock);
365370
I.replaceAllUsesWith(phi);
366371
}

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ bool PrivateMemoryResolution::safeToUseScratchSpace(llvm::Module& M) const
266266
bool supportsStatelessSpacePrivateMemory = Ctx.m_DriverInfo.supportsStatelessSpacePrivateMemory();
267267
bool bOCLLegacyStatelessCheck = true;
268268

269+
if (Ctx.m_instrTypes.hasDynamicGenericLoadStore && Ctx.platform.canForcePrivateToGlobal())
270+
{
271+
return false;
272+
}
273+
269274
if ((modMD.compOpt.OptDisable && bOCLLegacyStatelessCheck) || !supportsScratchSpacePrivateMemory
270275
|| Ctx.forceGlobalMemoryAllocation()) {
271276
return false;

IGC/common/igc_flags.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ DECLARE_IGC_REGKEY(DWORD, OverrideOCLMaxParamSize, 0, "Override the value impos
467467
DECLARE_IGC_REGKEY(bool, EnableOptReportPrivateMemoryToSLM, false, "[POC] Generate opt report file for moving private memory allocations to SLM.", false)
468468
DECLARE_IGC_REGKEY(bool, ForceAllPrivateMemoryToSLM, false, "[POC] Force moving all private memory allocations to SLM.", false)
469469
DECLARE_IGC_REGKEY(debugString, ForcePrivateMemoryToSLMOnBuffers, 0, "[POC] Force moving private memory allocations to SLM, semicolon-separated list of buffers.", false)
470+
DECLARE_IGC_REGKEY(bool, ForcePrivateMemoryToGlobalOnGeneric, false, "Force moving private memory allocations to global buffer when generic pointer is present", true)
470471

471472
DECLARE_IGC_GROUP("Generating precompiled headers")
472473
DECLARE_IGC_REGKEY(bool, ApplyConservativeRastWAHeader, true, "Apply WaConservativeRasterization for the platforms enabled", false)

0 commit comments

Comments
 (0)