Skip to content

Commit 1aca1a1

Browse files
scottp101gfxbot
authored andcommitted
rewrite get_local_size() on -cl-uniform-work-group-size to get_enqueued_local_size(). This, in combination with reqd_work_group_size, allows for compile time known values that can be further optimized (e.g., mul -> shl
Change-Id: I70f34ae607fb229bbef6ef624d1931337d97c372
1 parent ed3137e commit 1aca1a1

File tree

8 files changed

+217
-27
lines changed

8 files changed

+217
-27
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
100100
#include "Compiler/Legalizer/TypeLegalizerPass.h"
101101
#include "Compiler/Optimizer/OpenCLPasses/ClampLoopUnroll/ClampLoopUnroll.hpp"
102102
#include "Compiler/Optimizer/OpenCLPasses/Image3dToImage2darray/Image3dToImage2darray.hpp"
103+
#include "Compiler/Optimizer/OpenCLPasses/RewriteLocalSize/RewriteLocalSize.hpp"
103104
#include "Compiler/MetaDataApi/PurgeMetaDataUtils.hpp"
104105
#include "Compiler/MetaDataUtilsWrapper.h"
105106
#include "Compiler/SPIRMetaDataTranslation.h"
@@ -216,25 +217,32 @@ static void CommonOCLBasedPasses(
216217
//extracting OCL version major before SPIRMetadataTranslation pass deletes its metadata node
217218
const SPIRMD::SpirMetaDataUtils spirMDUtils(&(*pContext->getModule()));
218219
int OCLMajor = getOCLMajorVersion(spirMDUtils);
220+
221+
CompOptions &CompilerOpts = pContext->getModuleMetaData()->compOpt;
219222

220223
// check OpenCL build options
221-
assert((pContext->type == ShaderType::OPENCL_SHADER) && "Trying to use OCL common passes on non-OCL context");
222-
bool shouldForceCR = static_cast<OpenCLProgramContext*>(pContext)->m_Options.CorrectlyRoundedSqrt;
224+
bool shouldForceCR = pContext->m_Options.CorrectlyRoundedSqrt;
225+
226+
CompilerOpts.replaceGlobalOffsetsByZero =
227+
pContext->m_InternalOptions.replaceGlobalOffsetsByZero;
223228

224-
pContext->getModuleMetaData()->compOpt.replaceGlobalOffsetsByZero =
225-
static_cast<OpenCLProgramContext*>(pContext)->m_InternalOptions.replaceGlobalOffsetsByZero;
229+
CompilerOpts.SubgroupIndependentForwardProgressRequired =
230+
(pContext->m_Options.NoSubgroupIFP == false);
226231

227-
pContext->getModuleMetaData()->compOpt.SubgroupIndependentForwardProgressRequired =
228-
(static_cast<OpenCLProgramContext*>(pContext)->m_Options.NoSubgroupIFP == false);
232+
if (OCLMajor >= 2)
233+
{
234+
CompilerOpts.UniformWGS =
235+
pContext->m_Options.UniformWGS;
236+
}
229237

230-
pContext->getModuleMetaData()->compOpt.GreaterThan2GBBufferRequired =
231-
!static_cast<OpenCLProgramContext*>(pContext)->m_InternalOptions.Use32BitPtrArith;
238+
CompilerOpts.GreaterThan2GBBufferRequired =
239+
!pContext->m_InternalOptions.Use32BitPtrArith;
232240

233-
pContext->getModuleMetaData()->compOpt.GreaterThan4GBBufferRequired =
234-
static_cast<OpenCLProgramContext*>(pContext)->m_InternalOptions.IntelGreaterThan4GBBufferRequired;
241+
CompilerOpts.GreaterThan4GBBufferRequired =
242+
pContext->m_InternalOptions.IntelGreaterThan4GBBufferRequired;
235243

236-
pContext->getModuleMetaData()->compOpt.HasBufferOffsetArg =
237-
static_cast<OpenCLProgramContext*>(pContext)->m_InternalOptions.IntelHasBufferOffsetArg;
244+
CompilerOpts.HasBufferOffsetArg =
245+
pContext->m_InternalOptions.IntelHasBufferOffsetArg;
238246

239247
// right now we don't support any standard function in the code gen
240248
// maybe we want to support some at some point to take advantage of LLVM optimizations
@@ -277,7 +285,7 @@ static void CommonOCLBasedPasses(
277285
mpm.add(createBIFTransformsPass());
278286
}
279287

280-
if(static_cast<OpenCLProgramContext*>(pContext)->m_InternalOptions.KernelDebugEnable)
288+
if(pContext->m_InternalOptions.KernelDebugEnable)
281289
{
282290
IF_DEBUG_INFO(mpm.add(new ImplicitGlobalId());)
283291
}
@@ -297,17 +305,14 @@ static void CommonOCLBasedPasses(
297305
// OCL has built-ins so it always need to run inlining
298306
{
299307
mpm.add(createProcessFuncAttributesPass());
300-
if((pContext->m_instrTypes.hasSubroutines) || (pContext->type == ShaderType::OPENCL_SHADER))
308+
if (IGC_GET_FLAG_VALUE(FunctionControl) != FLAG_FCALL_FORCE_INLINE)
301309
{
302-
if (IGC_GET_FLAG_VALUE(FunctionControl) != FLAG_FCALL_FORCE_INLINE)
303-
{
304-
int Threshold = IGC_GET_FLAG_VALUE(OCLInlineThreshold);
305-
mpm.add(createFunctionInliningPass(Threshold));
306-
}
307-
else
308-
{
309-
mpm.add(createAlwaysInlinerLegacyPass());
310-
}
310+
int Threshold = IGC_GET_FLAG_VALUE(OCLInlineThreshold);
311+
mpm.add(createFunctionInliningPass(Threshold));
312+
}
313+
else
314+
{
315+
mpm.add(createAlwaysInlinerLegacyPass());
311316
}
312317
// The inliner sometimes fails to delete unused functions, this cleans up the remaining mess.
313318
mpm.add(createGlobalDCEPass());
@@ -342,6 +347,8 @@ static void CommonOCLBasedPasses(
342347
mpm.add(new BreakConstantExpr());
343348
}
344349

350+
if (CompilerOpts.UniformWGS)
351+
mpm.add(new RewriteLocalSize());
345352

346353
mpm.add(CreateFoldKnownWorkGroupSizes());
347354

@@ -430,7 +437,7 @@ static void CommonOCLBasedPasses(
430437
mpm.add(new SetFastMathFlags());
431438
mpm.add(new FixResourcePtr());
432439

433-
bool isOptDisabled = pContext->getModuleMetaData()->compOpt.OptDisable;
440+
bool isOptDisabled = CompilerOpts.OptDisable;
434441
if(isOptDisabled)
435442
{
436443
// Run additional predefined constant resolving when optimization is

IGC/Compiler/CISACodeGen/FoldKnownWorkGroupSizes.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2424
2525
======================= end_copyright_notice ==================================*/
2626

27+
#include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncsAnalysis.hpp"
2728
#include "FoldKnownWorkGroupSizes.h"
2829
#include "../IGCPassSupport.h"
2930
#include "../CodeGenPublic.h"
@@ -82,16 +83,19 @@ void FoldKnownWorkGroupSizes::visitCallInst(llvm::CallInst &I)
8283
CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
8384

8485

85-
if (funcName.equals("__builtin_IB_get_global_offset") && ctx->getModuleMetaData()->compOpt.replaceGlobalOffsetsByZero)
86+
if (funcName.equals(WIFuncsAnalysis::GET_GLOBAL_OFFSET) &&
87+
ctx->getModuleMetaData()->compOpt.replaceGlobalOffsetsByZero)
8688
{
8789
if (calledFunction->getReturnType() == Type::getInt32Ty(module->getContext()))
8890
{
8991
ConstantInt* IntZero = ConstantInt::get(Type::getInt32Ty(module->getContext()), 0);
9092
I.replaceAllUsesWith(IntZero);
93+
// TODO: erase when patch token is not required
94+
//I.eraseFromParent();
9195
m_changed = true;
9296
}
9397
}
94-
else if (funcName.equals("__builtin_IB_get_enqueued_local_size"))
98+
else if (funcName.equals(WIFuncsAnalysis::GET_ENQUEUED_LOCAL_SIZE))
9599
{
96100
auto itr = ctx->getMetaDataUtils()->findFunctionsInfoItem(I.getFunction());
97101

@@ -119,6 +123,8 @@ void FoldKnownWorkGroupSizes::visitCallInst(llvm::CallInst &I)
119123
auto *EE = IRB.CreateExtractElement(CV, Dim, "enqueuedLocalSize");
120124

121125
I.replaceAllUsesWith(EE);
126+
// TODO: erase when patch token is not required
127+
//I.eraseFromParent();
122128
m_changed = true;
123129
}
124130
}

IGC/Compiler/CodeGenPublic.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -975,7 +975,8 @@ namespace IGC
975975
public:
976976
Options(const TC::STB_TranslateInputArgs* pInputArgs) :
977977
CorrectlyRoundedSqrt(false),
978-
NoSubgroupIFP(false)
978+
NoSubgroupIFP(false),
979+
UniformWGS(false)
979980
{
980981
if (pInputArgs == nullptr)
981982
return;
@@ -994,10 +995,18 @@ namespace IGC
994995
NoSubgroupIFP = true;
995996
}
996997

998+
if (strstr(options, "-cl-uniform-work-group-size"))
999+
{
1000+
// Note that this is only available for -cl-std >= 2.0.
1001+
// This will be checked before we place this into the
1002+
// the module metadata.
1003+
UniformWGS = true;
1004+
}
9971005
}
9981006

9991007
bool CorrectlyRoundedSqrt;
10001008
bool NoSubgroupIFP;
1009+
bool UniformWGS;
10011010
};
10021011

10031012
// output: shader information

IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ add_subdirectory(WGFuncs)
2727
add_subdirectory(WIFuncs)
2828
add_subdirectory(ClampLoopUnroll)
2929
add_subdirectory(Image3dToImage2darray)
30+
add_subdirectory(RewriteLocalSize)
3031

3132

3233
#
@@ -68,6 +69,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
6869
${IGC_BUILD__SRC__OpenCLPasses_WIFuncs}
6970
${IGC_BUILD__SRC__OpenCLPasses_ClampLoopUnroll}
7071
${IGC_BUILD__SRC__OpenCLPasses_Image3dToImage2darray}
72+
${IGC_BUILD__SRC__OpenCLPasses_RewriteLocalSize}
7173
)
7274

7375

@@ -112,6 +114,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
112114
${IGC_BUILD__HDR__OpenCLPasses_WIFuncs}
113115
${IGC_BUILD__HDR__OpenCLPasses_ClampLoopUnroll}
114116
${IGC_BUILD__HDR__OpenCLPasses_Image3dToImage2darray}
117+
${IGC_BUILD__HDR__OpenCLPasses_RewriteLocalSize}
115118
)
116119

117120

@@ -150,6 +153,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
150153
Compiler__OpenCLPasses_WIFuncs
151154
Compiler__OpenCLPasses_ClampLoopUnroll
152155
Compiler__OpenCLPasses_Image3dToImage2darray
156+
Compiler__OpenCLPasses_RewriteLocalSize
153157
)
154158

155159

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
2+
3+
4+
set(IGC_BUILD__SRC__RewriteLocalSize
5+
"${CMAKE_CURRENT_SOURCE_DIR}/RewriteLocalSize.cpp"
6+
)
7+
set(IGC_BUILD__SRC__OpenCLPasses_RewriteLocalSize ${IGC_BUILD__SRC__RewriteLocalSize} PARENT_SCOPE)
8+
9+
set(IGC_BUILD__HDR__RewriteLocalSize
10+
"${CMAKE_CURRENT_SOURCE_DIR}/RewriteLocalSize.hpp"
11+
)
12+
set(IGC_BUILD__HDR__OpenCLPasses_RewriteLocalSize ${IGC_BUILD__HDR__RewriteLocalSize} PARENT_SCOPE)
13+
14+
15+
igc_sg_register(
16+
Compiler__OpenCLPasses_RewriteLocalSize
17+
"RewriteLocalSize"
18+
FILES
19+
${IGC_BUILD__SRC__RewriteLocalSize}
20+
${IGC_BUILD__HDR__RewriteLocalSize}
21+
)
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
//
27+
// As per the OCL 2.0 spec for get_enqueued_local_size:
28+
//
29+
// "Returns the same value as that returned by get_local_size(dimindx) if
30+
// the kernel is executed
31+
// with a uniform work-group size."
32+
//
33+
// This pass is only invoked when -cl-uniform-work-group-size is present.
34+
// In that case, get_local_size(x) == get_enqueued_local_size(x).
35+
//
36+
// So we will rewrite all of the get_local_size(x) so that we only have
37+
// get_enqueued_local_size(x). Those calls may further be folded for kernels
38+
// that utilize __attribute__((reqd_work_group_size(X,Y,Z))).
39+
//
40+
41+
#include "Compiler/Optimizer/OpenCLPasses/RewriteLocalSize/RewriteLocalSize.hpp"
42+
#include "Compiler/Optimizer/OpenCLPasses/WIFuncs/WIFuncsAnalysis.hpp"
43+
#include "Compiler/IGCPassSupport.h"
44+
45+
#include "common/LLVMWarningsPush.hpp"
46+
#include <llvmWrapper/IR/Function.h>
47+
#include <llvm/IR/Instructions.h>
48+
#include "common/LLVMWarningsPop.hpp"
49+
50+
using namespace llvm;
51+
using namespace IGC;
52+
53+
// Register pass to igc-opt
54+
#define PASS_FLAG "igc-rewrite-local-size"
55+
#define PASS_DESCRIPTION "converts get_local_size() to get_enqueued_local_size()"
56+
#define PASS_CFG_ONLY false
57+
#define PASS_ANALYSIS false
58+
IGC_INITIALIZE_PASS_BEGIN(RewriteLocalSize, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
59+
IGC_INITIALIZE_PASS_END(RewriteLocalSize, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
60+
61+
char RewriteLocalSize::ID = 0;
62+
63+
RewriteLocalSize::RewriteLocalSize() : ModulePass(ID)
64+
{
65+
initializeRewriteLocalSizePass(*PassRegistry::getPassRegistry());
66+
}
67+
68+
bool RewriteLocalSize::runOnModule(Module &M)
69+
{
70+
Function *LS = M.getFunction(WIFuncsAnalysis::GET_LOCAL_SIZE);
71+
if (!LS)
72+
return false;
73+
74+
Function *ELS = M.getFunction(WIFuncsAnalysis::GET_ENQUEUED_LOCAL_SIZE);
75+
if (!ELS)
76+
LS->setName(WIFuncsAnalysis::GET_ENQUEUED_LOCAL_SIZE);
77+
else
78+
LS->replaceAllUsesWith(ELS);
79+
80+
return true;
81+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
#pragma once
27+
28+
#include "common/LLVMWarningsPush.hpp"
29+
#include <llvm/Pass.h>
30+
#include "common/LLVMWarningsPop.hpp"
31+
32+
namespace IGC
33+
{
34+
class RewriteLocalSize : public llvm::ModulePass
35+
{
36+
public:
37+
// Pass identification, replacement for typeid
38+
static char ID;
39+
40+
/// @brief Constructor
41+
RewriteLocalSize();
42+
43+
/// @brief Destructor
44+
~RewriteLocalSize() {}
45+
46+
/// @brief Provides name of pass
47+
llvm::StringRef getPassName() const override
48+
{
49+
return "RewriteLocalSize";
50+
}
51+
52+
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override
53+
{
54+
AU.setPreservesCFG();
55+
}
56+
57+
bool runOnModule(llvm::Module &M) override;
58+
};
59+
60+
} // namespace IGC
61+

IGC/common/MDFrameWork.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ namespace IGC
198198
bool replaceGlobalOffsetsByZero = false;
199199
unsigned forcePixelShaderSIMDMode = 0;
200200
bool pixelShaderDoNotAbortOnSpill = false;
201+
bool UniformWGS = false;
201202
};
202203

203204
struct ComputeShaderInfo

0 commit comments

Comments
 (0)