Skip to content

Commit 3a4cff2

Browse files
dlei6gigcbot
authored andcommitted
Only enable CallWA for SIMD32 when nested stackcalls or indirect calls are present (Try #3)
Only enable CallWA for SIMD32 when nested stackcalls or indirect calls are present. Also added FunctionGroupAnalysis Function Group attributes: hasSubroutine() and isIndirectCallGroup(), plus general refactor for FGA and CShader.
1 parent 108bd39 commit 3a4cff2

File tree

8 files changed

+87
-67
lines changed

8 files changed

+87
-67
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4309,7 +4309,7 @@ namespace IGC
43094309
{
43104310
SaveOption(vISA_fusedCallWA, (uint32_t)2);
43114311
}
4312-
else if (m_program->HasStackCalls() || m_program->IsIntelSymbolTableVoidProgram())
4312+
else if (m_program->HasNestedCalls() || m_program->HasIndirectCalls() || m_program->IsIntelSymbolTableVoidProgram())
43134313
{
43144314
SaveOption(vISA_fusedCallWA, (uint32_t)1);
43154315
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -687,15 +687,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
687687
CShader* prevShader = m_pCtx->m_prevShader;
688688
if (isFuncGroupHead)
689689
{
690-
if (hasStackCall)
691-
{
692-
m_currShader->SetHasStackCalls();
693-
}
694-
if (isDummyKernel)
695-
{
696-
m_currShader->SetIsIntelSymbolTableVoidProgram();
697-
}
698-
699690
m_currShader->InitEncoder(m_SimdMode, m_canAbortOnSpill, m_ShaderDispatchMode);
700691
// Pre-analysis pass to be executed before call to visa builder so we can pass scratch space offset
701692
m_currShader->PreAnalysisPass();

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -586,15 +586,8 @@ FunctionGroup* GenXFunctionGroupAnalysis::getOrCreateIndirectCallGroup(Module* p
586586
{
587587
if (IndirectCallGroup) return IndirectCallGroup;
588588

589-
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
590-
591-
// Use the dummy kernel if it exists. Otherwise use the unique entry function.
592-
// OCL shaders should always use the dummy kernel.
593589
llvm::Function* defaultKernel = IGC::getIntelSymbolTableVoidProgram(pModule);
594-
if (!defaultKernel && pCtx->type != ShaderType::OPENCL_SHADER)
595-
{
596-
defaultKernel = IGC::getUniqueEntryFunc(pCtx->getMetaDataUtils(), pCtx->getModuleMetaData());
597-
}
590+
598591
// No default kernel found
599592
if (!defaultKernel) return nullptr;
600593

@@ -618,18 +611,15 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
618611

619612
for (auto FG : Groups)
620613
{
621-
if (FG == IndirectCallGroup)
614+
if (isIndirectCallGroup(FG))
622615
{
623-
// The indirect call group is not a true function group, in that the functions in this group does not have
616+
// The dummy kernel group is not a true function group, in that the functions in this group does not have
624617
// a valid callgraph that connects them. It's a dummy group where all indirectly called functions are contained.
625-
// Therefore, the group attributes are not valid here, since they are not connected to the true groupHead, which
626-
// is the caller kernel. We unset all the FG flags for this group.
618+
// Therefore, the group attributes are not valid here, since they are not connected to the real groupHead, which
619+
// is the caller kernel. We don't set any of the FG attribute flags for this group.
627620
//
628621
// Note, indirect functions in this group can still directly call stackcalls or subroutines, which may also belong
629622
// to this group due to cloning. However we still can't associate all functions in this group with a single callgraph.
630-
631-
// All other flags are already unset by default
632-
FG->m_hasCGAvailable = false;
633623
continue;
634624
}
635625

@@ -649,6 +639,10 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
649639
FG->m_hasNestedCall = true;
650640
}
651641
}
642+
else if (!isEntryFunc(pMdUtils, F))
643+
{
644+
FG->m_hasSubroutine = true;
645+
}
652646

653647
// check all functions in the group to see if there's an vla alloca
654648
// function attribute "hasVLA" should be set at ProcessFuncAttributes pass
@@ -682,7 +676,7 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
682676
// to an external module. We do not know the callgraph in this case.
683677
hasStackCall = true;
684678
FG->m_hasIndirectCall = true;
685-
FG->m_hasCGAvailable = false;
679+
FG->m_hasPartialCallGraph = true;
686680
}
687681
else if (calledF && calledF->hasFnAttribute("referenced-indirectly"))
688682
{
@@ -694,9 +688,10 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
694688
else if (calledF && calledF->isDeclaration() && calledF->hasFnAttribute("invoke_simd_target"))
695689
{
696690
// Invoke_simd targets use stack call by convention.
691+
// Calling a func decl indicates unknown CG
697692
hasStackCall = true;
698693
FG->m_hasIndirectCall = true;
699-
FG->m_hasCGAvailable = false;
694+
FG->m_hasPartialCallGraph = true;
700695
}
701696

702697
FG->m_hasStackCall |= hasStackCall;

IGC/Compiler/CISACodeGen/GenCodeGenModule.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SPDX-License-Identifier: MIT
88

99
#pragma once
1010
#include "Compiler/MetaDataApi/MetaDataApi.h"
11+
#include "Compiler/CISACodeGen/helper.h"
1112
#include "common/LLVMWarningsPush.hpp"
1213
#include "llvm/IR/ValueHandle.h"
1314
#include "llvm/Pass.h"
@@ -135,6 +136,10 @@ namespace IGC {
135136
bool isSingle() const {
136137
return (Functions.size() == 1 && Functions.front()->size() == 1);
137138
}
139+
/// \brief Function group has a subroutine
140+
bool hasSubroutine() const {
141+
return m_hasSubroutine;
142+
}
138143
/// \brief Function group has a stack call (including indirect calls)
139144
bool hasStackCall() const {
140145
return m_hasStackCall;
@@ -154,9 +159,9 @@ namespace IGC {
154159
bool hasNestedCall() const {
155160
return m_hasNestedCall;
156161
}
157-
/// \brief Function group has indirect calls where the CG is not available (e.g. calls function pointer, or callees in external module)
158-
bool hasCGAvailable() const {
159-
return m_hasCGAvailable;
162+
/// \brief Function group has indirect calls where the full CG is not available (e.g. calls function pointer, or callees in external module)
163+
bool hasPartialCallGraph() const {
164+
return m_hasPartialCallGraph;
160165
}
161166
/// \brief Function group has recursion
162167
bool hasRecursion() const {
@@ -177,13 +182,14 @@ namespace IGC {
177182
void setSimdModeInvalid(SIMDMode Mode);
178183

179184
private:
185+
bool m_hasSubroutine = false;
180186
bool m_hasStackCall = false;
181187
bool m_hasInlineAsm = false;
182188
bool m_hasVariableLengthAlloca = false;
183189
bool m_hasIndirectCall = false;
184190
bool m_hasRecursion = false;
185191
bool m_hasNestedCall = false;
186-
bool m_hasCGAvailable = true;
192+
bool m_hasPartialCallGraph = false;
187193
bool SIMDModeValid[3] = {true, true, true};
188194
};
189195

@@ -265,13 +271,14 @@ namespace IGC {
265271
SubGroupMap[F] = SubGroupHead;
266272
}
267273

268-
bool isIndirectCallGroup(const llvm::Function* F) {
269-
FunctionGroup* FG = getGroup(F);
270-
return FG != nullptr && FG == IndirectCallGroup;
274+
bool isIndirectCallGroup(const FunctionGroup* FG) {
275+
return FG && FG == IndirectCallGroup;
271276
}
272277

273-
FunctionGroup* getIndirectCallGroup() {
274-
return IndirectCallGroup;
278+
bool isIndirectCallGroup(const llvm::Function* F) {
279+
IGC_ASSERT(F);
280+
FunctionGroup* FG = getGroup(F);
281+
return isIndirectCallGroup(FG);
275282
}
276283

277284
/// \brief Check whether this is a group header.

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3927,9 +3927,10 @@ namespace IGC
39273927
simd_size = funcInfoMD->getSubGroupSize()->getSIMD_size();
39283928
}
39293929

3930-
bool hasStackCall = m_FGA && m_FGA->getGroup(&F) && m_FGA->getGroup(&F)->hasStackCall();
3931-
bool isIndirectGroup = m_FGA && m_FGA->getGroup(&F) && IGC::isIntelSymbolTableVoidProgram(m_FGA->getGroupHead(&F));
3932-
bool hasSubroutine = m_FGA && m_FGA->getGroup(&F) && !m_FGA->getGroup(&F)->isSingle() && !hasStackCall && !isIndirectGroup;
3930+
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
3931+
bool hasStackCall = FG && FG->hasStackCall();
3932+
bool isIndirectGroup = FG && m_FGA->isIndirectCallGroup(FG);
3933+
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
39333934
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);
39343935

39353936
if (simd_size == 0)
@@ -4098,26 +4099,31 @@ namespace IGC
40984099
}
40994100
}
41004101

4101-
bool hasStackCall = m_FGA && m_FGA->getGroup(&F) && m_FGA->getGroup(&F)->hasStackCall();
4102-
bool isIndirectGroup = m_FGA && m_FGA->getGroup(&F) && IGC::isIntelSymbolTableVoidProgram(m_FGA->getGroupHead(&F));
4103-
bool hasSubroutine = m_FGA && m_FGA->getGroup(&F) && !m_FGA->getGroup(&F)->isSingle() && !hasStackCall && !isIndirectGroup;
4102+
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
4103+
bool hasStackCall = FG && FG->hasStackCall();
4104+
bool isIndirectGroup = FG && m_FGA->isIndirectCallGroup(FG);
4105+
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
41044106

41054107
// If stack calls are present, disable simd32 in order to do CallWA in visa
41064108
if (IGC_IS_FLAG_ENABLED(EnableCallWA) &&
41074109
pCtx->platform.hasFusedEU() &&
41084110
pCtx->platform.getWATable().Wa_14016243945 == false &&
4109-
simdMode == SIMDMode::SIMD32 &&
4110-
(hasStackCall || isIndirectGroup))
4111+
simdMode == SIMDMode::SIMD32)
41114112
{
4112-
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
4113-
if (simd_size == 32)
4113+
bool hasNestedCall = FG && FG->hasNestedCall();
4114+
bool hasIndirectCall = FG && FG->hasIndirectCall();
4115+
if (hasNestedCall || hasIndirectCall || isIndirectGroup)
41144116
{
4115-
llvm::Function* Kernel = m_FGA->getGroup(&F)->getHead();
4116-
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
4117-
funcInfoMD->getSubGroupSize()->setSIMD_size(16);
4117+
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
4118+
if (simd_size == 32)
4119+
{
4120+
llvm::Function* Kernel = FG->getHead();
4121+
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
4122+
funcInfoMD->getSubGroupSize()->setSIMD_size(16);
4123+
}
4124+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
4125+
return SIMDStatus::SIMD_FUNC_FAIL;
41184126
}
4119-
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
4120-
return SIMDStatus::SIMD_FUNC_FAIL;
41214127
}
41224128

41234129
if (simd_size == 0)

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -281,10 +281,33 @@ class CShader
281281
void SetEmitPassHelper(EmitPass* EP) { m_EmitPass = EP; }
282282
void SetDominatorTreeHelper(llvm::DominatorTree* DT) { m_DT = DT; }
283283
void SetDataLayout(const llvm::DataLayout* DL) { m_DL = DL; }
284-
void SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis* FGA) { m_FGA = FGA; }
285284
void SetVariableReuseAnalysis(VariableReuseAnalysis* VRA) { m_VRA = VRA; }
286285
void SetMetaDataUtils(IGC::IGCMD::MetaDataUtils* pMdUtils) { m_pMdUtils = pMdUtils; }
287286
void SetScratchSpaceSize(uint size) { m_ScratchSpaceSize = size; }
287+
288+
// Set FGA and also FunctionGroup attributes
289+
void SetFunctionGroupAnalysis(GenXFunctionGroupAnalysis* FGA)
290+
{
291+
m_FGA = FGA;
292+
FunctionGroup* FG = (FGA && entry) ? FGA->getGroupForHead(entry) : nullptr;
293+
if (FG)
294+
{
295+
m_HasStackCall = FG->hasStackCall();
296+
m_HasIndirectCall = FG->hasIndirectCall();
297+
m_HasNestedCall = FG->hasNestedCall();
298+
m_IsIntelSymbolTableVoidProgram = FGA->isIndirectCallGroup(FG);
299+
}
300+
if (IGC_IS_FLAG_ENABLED(ForceAddingStackcallKernelPrerequisites))
301+
{
302+
m_HasStackCall = true;
303+
}
304+
}
305+
306+
bool HasStackCalls() const { return m_HasStackCall; }
307+
bool HasNestedCalls() const { return m_HasNestedCall; }
308+
bool HasIndirectCalls() const { return m_HasIndirectCall; }
309+
bool IsIntelSymbolTableVoidProgram() const { return m_IsIntelSymbolTableVoidProgram; }
310+
288311
IGCMD::MetaDataUtils* GetMetaDataUtils() { return m_pMdUtils; }
289312

290313
virtual void SetShaderSpecificHelper(EmitPass* emitPass) { IGC_UNUSED(emitPass); }
@@ -576,11 +599,6 @@ class CShader
576599
unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
577600
unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const;
578601

579-
bool HasStackCalls() const { return m_HasStackCalls; }
580-
void SetHasStackCalls() { m_HasStackCalls = true; }
581-
bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; }
582-
void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; }
583-
584602
////////////////////////////////////////////////////////////////////
585603
// NOTE: for vector load/stores instructions pass the
586604
// optional instruction argument checks additional constraints
@@ -737,11 +755,15 @@ class CShader
737755

738756
DebugInfoData diData;
739757

740-
bool m_HasStackCalls = false;
741-
bool m_isIntelSymbolTableVoidProgram = false;
742758
// Shader has LSC store messages with non-default L1 cache control
743759
bool m_HasLscStoresWithNonDefaultL1CacheControls = false;
744760
bool m_HasSample = false;
761+
762+
// Program function attributes
763+
bool m_HasStackCall = false;
764+
bool m_HasNestedCall = false;
765+
bool m_HasIndirectCall = false;
766+
bool m_IsIntelSymbolTableVoidProgram = false;
745767
};
746768

747769
struct SInstContext

IGC/Compiler/ModuleAllocaAnalysis.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,10 @@ bool ModuleAllocaAnalysis::safeToUseScratchSpace() const
103103
if (bOCLLegacyStatelessCheck) {
104104
if (auto * FGA = getAnalysisIfAvailable<GenXFunctionGroupAnalysis>()) {
105105
if (FGA->getModule() == M) {
106-
if (FGA->getIndirectCallGroup() != nullptr)
107-
return false;
108106
for (auto& I : *FGA) {
109-
if (I->hasStackCall())
107+
if (FGA->isIndirectCallGroup(I) && !I->isSingle())
110108
return false;
111-
if (I->hasVariableLengthAlloca())
109+
if (I->hasStackCall() || I->hasVariableLengthAlloca())
112110
return false;
113111
}
114112
}

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,10 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
156156
{
157157
continue;
158158
}
159-
bool hasStackCall = (FGA && FGA->getGroup(m_currFunction) && FGA->getGroup(m_currFunction)->hasStackCall()) || m_currFunction->hasFnAttribute("visaStackCall");
160-
bool hasVLA = (FGA && FGA->getGroup(m_currFunction) && FGA->getGroup(m_currFunction)->hasVariableLengthAlloca()) || m_currFunction->hasFnAttribute("hasVLA");
161-
bool isIndirectGroup = FGA && FGA->getGroup(m_currFunction) && isIntelSymbolTableVoidProgram(FGA->getGroupHead(m_currFunction));
159+
auto FG = FGA ? FGA->getGroup(m_currFunction) : nullptr;
160+
bool hasStackCall = (FG && FG->hasStackCall()) || m_currFunction->hasFnAttribute("visaStackCall");
161+
bool hasVLA = (FG && FG->hasVariableLengthAlloca()) || m_currFunction->hasFnAttribute("hasVLA");
162+
bool isIndirectGroup = FG && FGA->isIndirectCallGroup(FG);
162163
if (Ctx.platform.hasScratchSurface() &&
163164
modMD.compOpt.UseScratchSpacePrivateMemory)
164165
{
@@ -283,7 +284,7 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
283284
// Analyze call depth for stack memory required
284285
maxPrivateMem = AnalyzeCGPrivateMemUsage(pKernel);
285286
}
286-
if (!FG->hasCGAvailable() || FG->hasRecursion())
287+
if ((FG->hasIndirectCall() && FG->hasPartialCallGraph()) || FG->hasRecursion())
287288
{
288289
// If indirect calls or recursions exist, add additional 4KB and hope we don't run out.
289290
maxPrivateMem += (4 * 1024);

0 commit comments

Comments
 (0)