Skip to content

Commit 82a6156

Browse files
aaronkinteligcbot
authored andcommitted
Pass to convert 2D block intrinsics into constituents.
The 2D block IO intrinsics are opaque to the compiler and identical payloads cannot be optimized by the compiler. Intrinsics have been introduced to overcome this problem by allowing the 2D intrinsics to be broken up into constituent stages (payload, set X and Y relative to payload, IO operation). This PR adds a new pass, `Decompose2DBlockFuncs`, which converts the 2D IO intrinsics into the constituents, allowing subequent passes to optimize the code. At the current stage, the payload intrinsics can be hoisted above loops by LICM, and can be merged by EarlyCSA and others. The `LSC2DBlockSetAddrPayloadField` intrinsics have a mode in which the IO location in memory is specified exactly (relative to the payload), and another mode in which the intrinsic increments the value. When a payload is used by the `LSC2DBlockSetAddrPayloadField` intrinsic in the accumulator mode, the payload cannot be hoisted. Because of this, I have given the `LSC2DBlockCreateAddrPayload` default attributes of `writeonly`, which is conservative for when the SetAddr intrinsic is in accumulator mode. The `LSC2DBlockSetAddrPayloadField` intrinsic added by the `Decompose2DBlockFuncs` is always in the mode in which it explicitly specifies the memory location. Since we have this guarantee, the related payload is described as not accessing memory, since the dependencies are all properly accounted for by the data dependency; this allows the desired optimizations to function. The 2D block intrinsics are not decomposed when they do not fall within a loop, or if the payload would have loop-dependent parameters.
1 parent 71165b1 commit 82a6156

File tree

15 files changed

+726
-4
lines changed

15 files changed

+726
-4
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ SPDX-License-Identifier: MIT
105105
#include "Compiler/Optimizer/OpenCLPasses/BfloatFuncs/BfloatFuncsResolution.hpp"
106106
#include "Compiler/Optimizer/OpenCLPasses/DpasFuncs/DpasFuncsResolution.hpp"
107107
#include "Compiler/Optimizer/OpenCLPasses/LSCFuncs/LSCFuncsResolution.hpp"
108+
#include "Compiler/Optimizer/OpenCLPasses/Decompose2DBlockFuncs/Decompose2DBlockFuncs.hpp"
108109
#include "Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.hpp"
109110
#include "Compiler/Optimizer/OpenCLPasses/JointMatrixFuncsResolutionPass/JointMatrixFuncsResolutionPass.h"
110111
#include "Compiler/Optimizer/OpenCLPasses/RayTracing/ResolveOCLRaytracingBuiltins.hpp"
@@ -583,6 +584,12 @@ static void CommonOCLBasedPasses(OpenCLProgramContext* pContext)
583584
// Break down the intrinsics into smaller operations (eg. fmuladd to fmul add)
584585
mpm.add(new BreakdownIntrinsicPass());
585586

587+
// Break down 2D block intrinsics. Should be before a call to LICM. Mostly
588+
// useful when LICM is enabled, so we will consider only that case
589+
if (IGC_IS_FLAG_ENABLED(allowLICM)) {
590+
mpm.add(createDecompose2DBlockFuncsPass());
591+
}
592+
586593
{
587594
if(IGC_IS_FLAG_ENABLED(EnableConstantPromotion))
588595
{

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ SPDX-License-Identifier: MIT
1313
#include "Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.hpp"
1414
#include "Compiler/Optimizer/OpenCLPasses/StackOverflowDetection/StackOverflowDetection.hpp"
1515
#include "Compiler/Optimizer/OpenCLPasses/LSCFuncs/LSCFuncsResolution.hpp"
16+
#include "Compiler/Optimizer/OpenCLPasses/Decompose2DBlockFuncs/Decompose2DBlockFuncs.hpp"
1617
#include "Compiler/CISACodeGen/GenerateFrequencyData.hpp"
1718
#include "AdaptorCommon/RayTracing/RTStackFormat.h"
1819
#include "DeSSA.hpp"

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ void initializeTransformUnmaskedFunctionsPassPass(llvm::PassRegistry&);
156156
void initializeIndirectCallOptimizationPass(llvm::PassRegistry&);
157157
void initializePromoteInt8TypePass(llvm::PassRegistry&);
158158
void initializeDpasFuncsResolutionPass(llvm::PassRegistry&);
159+
void initializeDecompose2DBlockFuncsPass(llvm::PassRegistry&);
159160
void initializeLSCFuncsResolutionPass(llvm::PassRegistry&);
160161
void initializeConvertMSAAPayloadTo16BitPass(llvm::PassRegistry&);
161162
void initializeInterfaceOptimizationPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ add_subdirectory(KernelArgs)
3333
add_subdirectory(KernelFunctionCloning)
3434
add_subdirectory(BufferBoundsChecking)
3535
add_subdirectory(LSCFuncs)
36+
add_subdirectory(Decompose2DBlockFuncs)
3637
add_subdirectory(LocalBuffers)
3738
add_subdirectory(NamedBarriers)
3839
add_subdirectory(NontemporalLoadsAndStoresInAssert)
@@ -66,6 +67,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
6667
${IGC_BUILD__SRC__OpenCLPasses_BreakConstantExpr}
6768
${IGC_BUILD__SRC__OpenCLPasses_BreakdownIntrinsic}
6869
${IGC_BUILD__SRC__OpenCLPasses_CorrectlyRoundedDivSqrt}
70+
${IGC_BUILD__SRC__OpenCLPasses_Decompose2DBlockFuncs}
6971
${IGC_BUILD__SRC__OpenCLPasses_DeviceEnqueueFuncs}
7072
${IGC_BUILD__SRC__OpenCLPasses_DisableLoopUnrollOnRetry}
7173
${IGC_BUILD__SRC__OpenCLPasses_DpasFuncs}
@@ -137,6 +139,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
137139
${IGC_BUILD__HDR__OpenCLPasses_KernelFunctionCloning}
138140
${IGC_BUILD__HDR__OpenCLPasses_BufferBoundsChecking}
139141
${IGC_BUILD__HDR__OpenCLPasses_LSCFuncs}
142+
${IGC_BUILD__HDR__OpenCLPasses_IOBlock2DFuncs}
140143
${IGC_BUILD__HDR__OpenCLPasses_LocalBuffers}
141144
${IGC_BUILD__HDR__OpenCLPasses_NamedBarriers}
142145
${IGC_BUILD__HDR__OpenCLPasses_NontemporalLoadsAndStoresInAssert}
@@ -192,6 +195,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
192195
Compiler__OpenCLPasses_KernelFunctionCloning
193196
Compiler__OpenCLPasses_BufferBoundsChecking
194197
Compiler__OpenCLPasses_LSCFuncs
198+
Compiler__OpenCLPasses_IOBlock2DFuncs
195199
Compiler__OpenCLPasses_LocalBuffers
196200
Compiler__OpenCLPasses_NamedBarriers
197201
Compiler__OpenCLPasses_NontemporalLoadsAndStoresInAssert
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#=========================== begin_copyright_notice ============================
2+
#
3+
# Copyright (C) 2019-2021 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: MIT
6+
#
7+
#============================ end_copyright_notice =============================
8+
9+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
10+
11+
12+
set(IGC_BUILD__SRC__Decompose2DBlockFuncs
13+
"${CMAKE_CURRENT_SOURCE_DIR}/Decompose2DBlockFuncs.cpp"
14+
)
15+
set(IGC_BUILD__SRC__OpenCLPasses_Decompose2DBlockFuncs ${IGC_BUILD__SRC__Decompose2DBlockFuncs} PARENT_SCOPE)
16+
17+
set(IGC_BUILD__HDR__Decompose2DBlockFuncs
18+
"${CMAKE_CURRENT_SOURCE_DIR}/Decompose2DBlockFuncs.hpp"
19+
)
20+
set(IGC_BUILD__HDR__OpenCLPasses_Decompose2DBlockFuncs ${IGC_BUILD__HDR__Decompose2DBlockFuncs} PARENT_SCOPE)
21+
22+
23+
igc_sg_register(
24+
Compiler__OpenCLPasses_Decompose2DBlockFuncs
25+
"Decompose2DBlockFuncs"
26+
FILES
27+
${IGC_BUILD__SRC__Decompose2DBlockFuncs}
28+
${IGC_BUILD__HDR__Decompose2DBlockFuncs}
29+
)

0 commit comments

Comments
 (0)