Skip to content

Commit 62c6fdc

Browse files
dlei6gigcbot
authored andcommitted
Switch ordering of function call optimizations
Moving up PruneUnusedArgumentPass right after function cloning, then doing InstructionCombining, can potentially remove alloca pointers passed as function arguments, but is used. This frees up memory on the stack frame since those allocas no longer need to be stored on private memory.
1 parent 0d2de10 commit 62c6fdc

File tree

2 files changed

+24
-25
lines changed

2 files changed

+24
-25
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,30 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
685685
mpm.add(llvm::createGlobalDCEPass());
686686
mpm.add(new PurgeMetaDataUtils());
687687
mpm.add(createGenXCodeGenModulePass());
688+
689+
// Light cleanup for subroutines after cloning. Note that the constant
690+
// propogation order is reversed, compared to the opt sequence in
691+
// OptimizeIR. There is a substantial gain with CFG simplification after
692+
// interprocedural constant propagation.
693+
if (!isOptDisabled)
694+
{
695+
mpm.add(createPruneUnusedArgumentsPass());
696+
#if LLVM_VERSION_MAJOR >= 12
697+
mpm.add(createIPSCCPPass());
698+
#else
699+
if (!ctx.m_hasStackCalls)
700+
{
701+
// Don't run IPConstantProp when stackcalls are present.
702+
// Let global constants be relocated inside stack funcs.
703+
// We cannot process SLM constants inside stackcalls, so don't propagate them.
704+
mpm.add(createIPConstantPropagationPass());
705+
}
706+
mpm.add(createConstantPropagationPass());
707+
#endif
708+
mpm.add(createDeadCodeEliminationPass());
709+
mpm.add(createCFGSimplificationPass());
710+
mpm.add(createIGCInstructionCombiningPass());
711+
}
688712
}
689713

690714
// Remove all uses of implicit arg instrinsics after inlining by lowering them to kernel args
@@ -793,30 +817,6 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
793817
mpm.add(new StatelessToStateful(hasBufOff));
794818
}
795819

796-
// Light cleanup for subroutines after cloning. Note that the constant
797-
// propogation order is reversed, compared to the opt sequence in
798-
// OptimizeIR. There is a substantial gain with CFG simplification after
799-
// interprocedural constant propagation.
800-
if (ctx.m_enableSubroutine && !isOptDisabled)
801-
{
802-
mpm.add(createPruneUnusedArgumentsPass());
803-
804-
#if LLVM_VERSION_MAJOR >= 12
805-
mpm.add(createIPSCCPPass());
806-
#else
807-
if (!ctx.m_hasStackCalls)
808-
{
809-
// Don't run IPConstantProp when stackcalls are present.
810-
// Let global constants be relocated inside stack funcs.
811-
// We cannot process SLM constants inside stackcalls, so don't propagate them.
812-
mpm.add(createIPConstantPropagationPass());
813-
}
814-
mpm.add(createConstantPropagationPass());
815-
#endif
816-
817-
mpm.add(createDeadCodeEliminationPass());
818-
mpm.add(createCFGSimplificationPass());
819-
}
820820
// Since we don't support switch statements, switch lowering is needed after the last CFG simplication
821821
mpm.add(llvm::createLowerSwitchPass());
822822

IGC/Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,6 @@ void ProgramScopeConstantAnalysis::addData(Constant* initializer,
415415
const unsigned pointedToAddrSpace = WalkCastsToFindNamedAddrSpace(initializer);
416416

417417
IGC_ASSERT(addressSpace == ADDRESS_SPACE_GLOBAL || addressSpace == ADDRESS_SPACE_CONSTANT);
418-
IGC_ASSERT_MESSAGE(pointerSize == 8, "Can global var pointer ever be 32bits?");
419418

420419
// We can only patch global and constant pointers.
421420
if (pointedToAddrSpace == ADDRESS_SPACE_GLOBAL || pointedToAddrSpace == ADDRESS_SPACE_CONSTANT)

0 commit comments

Comments
 (0)