Skip to content

Commit 8ae4499

Browse files
pkwasnie-inteligcbot
authored andcommitted
Add GEP Loop Strength Reduction pass
Adds new optimization pass reducing strength of GEP instructions in loop. Xe architecture doesn't have native support for access from "base pointer plus offset". Codegen must translate GEP instruction "getelementptr %constantPointer, %variableOffset" to "(long long)pointer + offset*sizeof(*pointer)", generating additional mov/add/shl instructions. If GEP is inside a loop and offset is incremented in constant steps, it is beneficial to change GEP into induction variable "getelementptr %variablePointer, %constantOffset". This change adds new pass translating code: int id = get_global_id(0); for (int i = 32; i < n_iters - 32; i += 32) { output[id + i] = output[id + i + 32] * output[id + i - 32]; } Into: int id = get_global_id(0); global float* outputm32 = output + id; for (int i = 32; i < n_iters - 32; i += 32, outputm32 += 32) { *(outputm32 + 32) = *(outputm32 + 64) * *outputm32; }
1 parent ab04ed5 commit 8ae4499

25 files changed

+2610
-6
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ SPDX-License-Identifier: MIT
8383
#include "Compiler/Optimizer/OpenCLPasses/UnreachableHandling/UnreachableHandling.hpp"
8484
#include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncResolution.hpp"
8585
#include "Compiler/Optimizer/OpenCLPasses/ScalarArgAsPointer/ScalarArgAsPointer.hpp"
86+
#include "Compiler/Optimizer/OpenCLPasses/GEPLoopStrengthReduction/GEPLoopStrengthReduction.hpp"
8687
#include "Compiler/Optimizer/MCSOptimization.hpp"
8788
#include "Compiler/Optimizer/GatingSimilarSamples.hpp"
8889
#include "Compiler/Optimizer/IntDivConstantReduction.hpp"
@@ -355,6 +356,15 @@ static void UpdateInstTypeHint(CodeGenContext& ctx)
355356
// forward declaration
356357
llvm::ModulePass* createPruneUnusedArgumentsPass();
357358

359+
static bool useStatelessToStateful(CodeGenContext& ctx)
360+
{
361+
return (ctx.m_instrTypes.hasLoadStore &&
362+
ctx.m_DriverInfo.SupportsStatelessToStatefulBufferTransformation() &&
363+
!ctx.getModuleMetaData()->compOpt.GreaterThan4GBBufferRequired &&
364+
IGC_IS_FLAG_ENABLED(EnableStatelessToStateful) &&
365+
!ctx.m_instrTypes.hasInlineAsmPointerAccess);
366+
}
367+
358368
void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature* pSignature)
359369
{
360370
COMPILER_TIME_START(&ctx, TIME_CG_Add_Legalization_Passes);
@@ -705,12 +715,7 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
705715
mpm.add(new PromoteStatelessToBindless());
706716
}
707717

708-
if (!isOptDisabled &&
709-
ctx.m_instrTypes.hasLoadStore &&
710-
ctx.m_DriverInfo.SupportsStatelessToStatefulBufferTransformation() &&
711-
!ctx.getModuleMetaData()->compOpt.GreaterThan4GBBufferRequired &&
712-
IGC_IS_FLAG_ENABLED(EnableStatelessToStateful) &&
713-
!ctx.m_instrTypes.hasInlineAsmPointerAccess)
718+
if (!isOptDisabled && useStatelessToStateful(ctx))
714719
{
715720
mpm.add(new StatelessToStateful());
716721
}
@@ -1485,6 +1490,15 @@ void OptimizeIR(CodeGenContext* const pContext)
14851490
mpm.add(createSROAPass());
14861491
}
14871492
}
1493+
1494+
if (pContext->type == ShaderType::OPENCL_SHADER &&
1495+
pContext->platform.isCoreChildOf(IGFX_XE_HPC_CORE) &&
1496+
!useStatelessToStateful(*pContext) &&
1497+
pContext->m_retryManager.IsFirstTry())
1498+
{
1499+
mpm.add(createGEPLoopStrengthReductionPass(IGC_IS_FLAG_ENABLED(allowLICM) &&
1500+
pContext->m_retryManager.AllowLICM()));
1501+
}
14881502
}
14891503

14901504
// Note:

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ void initializeGenericAddressAnalysisPass(llvm::PassRegistry&);
6666
void initializeGenericAddressDynamicResolutionPass(llvm::PassRegistry&);
6767
void initializeGenFDIVEmulationPass(llvm::PassRegistry&);
6868
void initializeGenIRLoweringPass(llvm::PassRegistry&);
69+
void initializeGEPLoopStrengthReductionPass(llvm::PassRegistry&);
6970
void initializeGEPLoweringPass(llvm::PassRegistry&);
7071
void initializeGenSpecificPatternPass(llvm::PassRegistry&);
7172
void initializeGreedyLiveRangeReductionPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_subdirectory(DeviceEnqueueFuncs)
2020
add_subdirectory(DisableLoopUnrollOnRetry)
2121
add_subdirectory(ExtensionFuncs)
2222
add_subdirectory(GenericAddressResolution)
23+
add_subdirectory(GEPLoopStrengthReduction)
2324
add_subdirectory(ImageFuncs)
2425
add_subdirectory(LocalBuffers)
2526
add_subdirectory(NamedBarriers)
@@ -70,6 +71,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
7071
${IGC_BUILD__SRC__OpenCLPasses_DpasFuncs}
7172
${IGC_BUILD__SRC__OpenCLPasses_ExtensionFuncs}
7273
${IGC_BUILD__SRC__OpenCLPasses_GenericAddressResolution}
74+
${IGC_BUILD__SRC__OpenCLPasses_GEPLoopStrengthReduction}
7375
${IGC_BUILD__SRC__OpenCLPasses_ImageFuncs}
7476
${IGC_BUILD__SRC__OpenCLPasses_LocalBuffers}
7577
${IGC_BUILD__SRC__OpenCLPasses_LowerLocalMemPool}
@@ -125,6 +127,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
125127
${IGC_BUILD__HDR__OpenCLPasses_DpasFuncs}
126128
${IGC_BUILD__HDR__OpenCLPasses_ExtensionFuncs}
127129
${IGC_BUILD__HDR__OpenCLPasses_GenericAddressResolution}
130+
${IGC_BUILD__HDR__OpenCLPasses_GEPLoopStrengthReduction}
128131
${IGC_BUILD__HDR__OpenCLPasses_ImageFuncs}
129132
${IGC_BUILD__HDR__OpenCLPasses_LocalBuffers}
130133
${IGC_BUILD__HDR__OpenCLPasses_LowerLocalMemPool}
@@ -173,6 +176,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
173176
Compiler__OpenCLPasses_ExtensionFuncs
174177
Compiler__OpenCLPasses_GenericAddressResolution
175178
Compiler__OpenCLPasses_GenericAddressSpaceStaticResolution
179+
Compiler__OpenCLPasses_GEPLoopStrengthReduction
176180
Compiler__OpenCLPasses_ImageFuncs
177181
Compiler__OpenCLPasses_LocalBuffers
178182
Compiler__OpenCLPasses_LowerLocalMemPool
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#=========================== begin_copyright_notice ============================
2+
#
3+
# Copyright (C) 2023 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: MIT
6+
#
7+
#============================ end_copyright_notice =============================
8+
9+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
10+
11+
12+
set(IGC_BUILD__SRC__GEPLoopStrengthReduction
13+
"${CMAKE_CURRENT_SOURCE_DIR}/GEPLoopStrengthReduction.cpp"
14+
)
15+
set(IGC_BUILD__SRC__OpenCLPasses_GEPLoopStrengthReduction ${IGC_BUILD__SRC__GEPLoopStrengthReduction} PARENT_SCOPE)
16+
17+
set(IGC_BUILD__HDR__GEPLoopStrengthReduction
18+
"${CMAKE_CURRENT_SOURCE_DIR}/GEPLoopStrengthReduction.hpp"
19+
)
20+
set(IGC_BUILD__HDR__OpenCLPasses_GEPLoopStrengthReduction ${IGC_BUILD__HDR__GEPLoopStrengthReduction} PARENT_SCOPE)
21+
22+
23+
igc_sg_register(
24+
Compiler__OpenCLPasses_GEPLoopStrengthReduction
25+
"GEPLoopStrengthReduction"
26+
FILES
27+
${IGC_BUILD__SRC__GEPLoopStrengthReduction}
28+
${IGC_BUILD__HDR__GEPLoopStrengthReduction}
29+
)

0 commit comments

Comments
 (0)