Skip to content

Commit f2125de

Browse files
dlei6gigcbot
authored andcommitted
Only enable CallWA for SIMD32 when nested stackcalls or indirect calls are present
Only enable CallWA for SIMD32 when nested stackcalls or indirect calls are present. Also added two FunctionGroupAnalysis Function Group attributes: hasSubroutine() and isIndirectCallGroup().
1 parent 1ede35b commit f2125de

File tree

6 files changed

+57
-31
lines changed

6 files changed

+57
-31
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4301,7 +4301,7 @@ namespace IGC
43014301
{
43024302
SaveOption(vISA_fusedCallWA, (uint32_t)2);
43034303
}
4304-
else if (m_program->HasStackCalls() || m_program->IsIntelSymbolTableVoidProgram())
4304+
else if (m_program->HasNestedCalls() || m_program->HasIndirectCalls() || m_program->IsIntelSymbolTableVoidProgram())
43054305
{
43064306
SaveOption(vISA_fusedCallWA, (uint32_t)1);
43074307
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -687,15 +687,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
687687
CShader* prevShader = m_pCtx->m_prevShader;
688688
if (isFuncGroupHead)
689689
{
690-
if (hasStackCall)
691-
{
692-
m_currShader->SetHasStackCalls();
693-
}
694-
if (isDummyKernel)
695-
{
696-
m_currShader->SetIsIntelSymbolTableVoidProgram();
697-
}
698-
699690
m_currShader->InitEncoder(m_SimdMode, m_canAbortOnSpill, m_ShaderDispatchMode);
700691
// Pre-analysis pass to be executed before call to visa builder so we can pass scratch space offset
701692
m_currShader->PreAnalysisPass();

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,7 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
630630
// to this group due to cloning. However we still can't associate all functions in this group with a single callgraph.
631631

632632
// All other flags are already unset by default
633+
FG->m_isIndirectCallGroup = true;
633634
FG->m_hasCGAvailable = false;
634635
continue;
635636
}
@@ -650,6 +651,10 @@ void GenXFunctionGroupAnalysis::setGroupAttributes()
650651
FG->m_hasNestedCall = true;
651652
}
652653
}
654+
else if (!isEntryFunc(pMdUtils, F))
655+
{
656+
FG->m_hasSubroutine = true;
657+
}
653658

654659
// check all functions in the group to see if there's an vla alloca
655660
// function attribute "hasVLA" should be set at ProcessFuncAttributes pass

IGC/Compiler/CISACodeGen/GenCodeGenModule.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ namespace IGC {
135135
bool isSingle() const {
136136
return (Functions.size() == 1 && Functions.front()->size() == 1);
137137
}
138+
/// \brief Function group has a subroutine
139+
bool hasSubroutine() const {
140+
return m_hasSubroutine;
141+
}
138142
/// \brief Function group has a stack call (including indirect calls)
139143
bool hasStackCall() const {
140144
return m_hasStackCall;
@@ -162,6 +166,10 @@ namespace IGC {
162166
bool hasRecursion() const {
163167
return m_hasRecursion;
164168
}
169+
/// \brief This is the indirect call group
170+
bool isIndirectCallGroup() const {
171+
return m_isIndirectCallGroup;
172+
}
165173

166174
void replaceGroupHead(llvm::Function* OH, llvm::Function* NH) {
167175
IGC_UNUSED(OH);
@@ -177,13 +185,15 @@ namespace IGC {
177185
void setSimdModeInvalid(SIMDMode Mode);
178186

179187
private:
188+
bool m_hasSubroutine = false;
180189
bool m_hasStackCall = false;
181190
bool m_hasInlineAsm = false;
182191
bool m_hasVariableLengthAlloca = false;
183192
bool m_hasIndirectCall = false;
184193
bool m_hasRecursion = false;
185194
bool m_hasNestedCall = false;
186195
bool m_hasCGAvailable = true;
196+
bool m_isIndirectCallGroup = false;
187197
bool SIMDModeValid[3] = {true, true, true};
188198
};
189199

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3927,9 +3927,10 @@ namespace IGC
39273927
simd_size = funcInfoMD->getSubGroupSize()->getSIMD_size();
39283928
}
39293929

3930-
bool hasStackCall = m_FGA && m_FGA->getGroup(&F) && m_FGA->getGroup(&F)->hasStackCall();
3931-
bool isIndirectGroup = m_FGA && m_FGA->getGroup(&F) && IGC::isIntelSymbolTableVoidProgram(m_FGA->getGroupHead(&F));
3932-
bool hasSubroutine = m_FGA && m_FGA->getGroup(&F) && !m_FGA->getGroup(&F)->isSingle() && !hasStackCall && !isIndirectGroup;
3930+
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
3931+
bool hasStackCall = FG && FG->hasStackCall();
3932+
bool isIndirectGroup = FG && FG->isIndirectCallGroup();
3933+
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
39333934
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);
39343935

39353936
if (simd_size == 0)
@@ -4098,26 +4099,31 @@ namespace IGC
40984099
}
40994100
}
41004101

4101-
bool hasStackCall = m_FGA && m_FGA->getGroup(&F) && m_FGA->getGroup(&F)->hasStackCall();
4102-
bool isIndirectGroup = m_FGA && m_FGA->getGroup(&F) && IGC::isIntelSymbolTableVoidProgram(m_FGA->getGroupHead(&F));
4103-
bool hasSubroutine = m_FGA && m_FGA->getGroup(&F) && !m_FGA->getGroup(&F)->isSingle() && !hasStackCall && !isIndirectGroup;
4102+
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
4103+
bool hasStackCall = FG && FG->hasStackCall();
4104+
bool isIndirectGroup = FG && FG->isIndirectCallGroup();
4105+
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
41044106

41054107
// If stack calls are present, disable simd32 in order to do CallWA in visa
41064108
if (IGC_IS_FLAG_ENABLED(EnableCallWA) &&
41074109
pCtx->platform.hasFusedEU() &&
41084110
pCtx->platform.getWATable().Wa_14016243945 == false &&
4109-
simdMode == SIMDMode::SIMD32 &&
4110-
(hasStackCall || isIndirectGroup))
4111+
simdMode == SIMDMode::SIMD32)
41114112
{
4112-
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
4113-
if (simd_size == 32)
4113+
bool hasNestedCall = FG && FG->hasNestedCall();
4114+
bool hasIndirectCall = FG && FG->hasIndirectCall();
4115+
if (hasNestedCall || hasIndirectCall || isIndirectGroup)
41144116
{
4115-
llvm::Function* Kernel = m_FGA->getGroup(&F)->getHead();
4116-
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
4117-
funcInfoMD->getSubGroupSize()->setSIMD_size(16);
4117+
// If sub_group_size is set to 32, resize it to 16 so SIMD16 compilation will still succeed
4118+
if (simd_size == 32)
4119+
{
4120+
llvm::Function* Kernel = FG->getHead();
4121+
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
4122+
funcInfoMD->getSubGroupSize()->setSIMD_size(16);
4123+
}
4124+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
4125+
return SIMDStatus::SIMD_FUNC_FAIL;
41184126
}
4119-
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
4120-
return SIMDStatus::SIMD_FUNC_FAIL;
41214127
}
41224128

41234129
if (simd_size == 0)

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,10 +576,26 @@ class CShader
576576
unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
577577
unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const;
578578

579-
bool HasStackCalls() const { return m_HasStackCalls; }
580-
void SetHasStackCalls() { m_HasStackCalls = true; }
581-
bool IsIntelSymbolTableVoidProgram() const { return m_isIntelSymbolTableVoidProgram; }
582-
void SetIsIntelSymbolTableVoidProgram() { m_isIntelSymbolTableVoidProgram = true; }
579+
inline bool HasStackCalls() const
580+
{
581+
auto FG = m_FGA ? m_FGA->getGroupForHead(entry) : nullptr;
582+
return (FG && FG->hasStackCall()) || IGC_IS_FLAG_ENABLED(ForceAddingStackcallKernelPrerequisites);
583+
}
584+
inline bool IsIntelSymbolTableVoidProgram() const
585+
{
586+
auto FG = m_FGA ? m_FGA->getGroupForHead(entry) : nullptr;
587+
return FG && FG->isIndirectCallGroup();
588+
}
589+
inline bool HasNestedCalls() const
590+
{
591+
auto FG = m_FGA ? m_FGA->getGroupForHead(entry) : nullptr;
592+
return FG && FG->hasNestedCall();
593+
}
594+
inline bool HasIndirectCalls() const
595+
{
596+
auto FG = m_FGA ? m_FGA->getGroupForHead(entry) : nullptr;
597+
return FG && FG->hasIndirectCall();
598+
}
583599

584600
////////////////////////////////////////////////////////////////////
585601
// NOTE: for vector load/stores instructions pass the
@@ -737,8 +753,6 @@ class CShader
737753

738754
DebugInfoData diData;
739755

740-
bool m_HasStackCalls = false;
741-
bool m_isIntelSymbolTableVoidProgram = false;
742756
// Shader has LSC store messages with non-default L1 cache control
743757
bool m_HasLscStoresWithNonDefaultL1CacheControls = false;
744758
bool m_HasSample = false;

0 commit comments

Comments
 (0)