Skip to content

Commit 706d81d

Browse files
tmparkigcbot
authored andcommitted
Add an OCL option to enable profile-guided trimming
Add the OCL option -ze-opt-profile-guided-trimming to enable profile-guided trimming.
1 parent b50c4cc commit 706d81d

File tree

8 files changed

+276
-66
lines changed

8 files changed

+276
-66
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,10 @@ static void CommonOCLBasedPasses(OpenCLProgramContext* pContext)
425425
mpm.add(createSCCPPass());
426426
mpm.add(new ResolveConstExprCalls());
427427

428-
// Estimate maximal function size in the module and disable subroutine if not profitable.
429-
mpm.add(createEstimateFunctionSizePass());
428+
// Estimate maximal function size in the module and disable subroutine
429+
// if not profitable.
430+
mpm.add(createEstimateFunctionSizePass(
431+
pContext->m_Options.StaticProfileGuidedTrimming));
430432
mpm.add(createProcessFuncAttributesPass());
431433
FastMathFlags Mask;
432434
Mask.setFast();

IGC/Compiler/CISACodeGen/EstimateFunctionSize.cpp

Lines changed: 158 additions & 62 deletions
Large diffs are not rendered by default.

IGC/Compiler/CISACodeGen/EstimateFunctionSize.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace IGC {
4242
AL_Kernel
4343
};
4444

45-
explicit EstimateFunctionSize(AnalysisLevel = AL_Module);
45+
explicit EstimateFunctionSize(AnalysisLevel = AL_Module, bool = false);
4646
~EstimateFunctionSize();
4747
EstimateFunctionSize(const EstimateFunctionSize&) = delete;
4848
EstimateFunctionSize& operator=(const EstimateFunctionSize&) = delete;
@@ -137,9 +137,45 @@ namespace IGC {
137137
llvm::ScaledNumber<uint64_t> thresholdForTrimming;
138138
std::unordered_map<llvm::Loop *, llvm::ScaledNumber<uint64_t>>
139139
LoopIterCnts;
140+
141+
// Flags for Kernel trimming
142+
bool ControlKernelTotalSize;
143+
bool ControlUnitSize;
144+
unsigned ControlInlineTinySize;
145+
unsigned UnitSizeThreshold;
146+
147+
// Flags for Static Profile-guided trimming
148+
bool StaticProfileGuidedTrimming;
149+
bool UseFrequencyInfoForSPGT;
150+
bool BlockFrequencySampling;
151+
bool EnableLeafCollapsing;
152+
bool EnableSizeContributionOptimization;
153+
bool LoopCountAwareTrimming;
154+
bool EnableGreedyTrimming;
155+
unsigned SizeWeightForSPGT;
156+
unsigned FrequencyWeightForSPGT;
157+
unsigned MetricForKernelSizeReduction;
158+
unsigned ParameterForColdFuncThreshold;
159+
unsigned ControlInlineTinySizeForSPGT;
160+
unsigned MaxUnrollCountForFunctionSizeAnalysis;
161+
unsigned SkipTrimmingOneCopyFunction;
162+
std::string SelectiveTrimming;
163+
164+
// Flags for Partitioning
165+
bool PartitionUnit;
166+
bool StaticProfileGuidedPartitioning;
167+
168+
// Flags for implcit arguments and external functions
169+
bool ForceInlineExternalFunctions;
170+
bool ForceInlineStackCallWithImplArg;
171+
bool ControlInlineImplicitArgs;
172+
unsigned SubroutineThreshold;
173+
unsigned KernelTotalSizeThreshold;
174+
unsigned ExpandedUnitSizeThreshold;
140175
};
141176

142177
llvm::ModulePass* createEstimateFunctionSizePass();
178+
llvm::ModulePass *createEstimateFunctionSizePass(bool);
143179
llvm::ModulePass* createEstimateFunctionSizePass(EstimateFunctionSize::AnalysisLevel);
144180

145181
} // namespace IGC

IGC/Compiler/CISACodeGen/OpenCLOptions.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,5 +603,9 @@ void Options::parseOptions(const char* opts)
603603
Xfinalizer = true;
604604
XfinalizerOption = arg->getValue();
605605
}
606+
607+
if (apiOptions.hasArg(OPT_static_profile_guided_trimming_common)) {
608+
StaticProfileGuidedTrimming = true;
609+
}
606610
}
607611
} // namespace IGC

IGC/Compiler/CISACodeGen/OpenCLOptions.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ class Options
191191
// This option enables FP64 emulation for conversions
192192
// This applies to platforms that cannot HW support for double operations
193193
bool EnableFP64GenConvEmu = false;
194+
// This option enables static profile-guided trimming
195+
bool StaticProfileGuidedTrimming = false;
194196

195197
private:
196198
void parseOptions(const char* opts);

IGC/OCLFE/igd_fcl_mcl/source/clang_tb.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1374,7 +1374,9 @@ namespace TC
13741374
(strcmp(pParam, "-ze-fp64-gen-emu") == 0) || //used by fp64 emulation
13751375
(strcmp(pParam, "-cl-fp64-gen-conv-emu") == 0) || //used by fp64 conversion emulation
13761376
(strcmp(pParam, "-ze-fp64-gen-conv-emu") == 0) || //used by fp64 conversion emulation
1377-
(strcmp(pParam, "-Xfinalizer") == 0); // used to pass options to visa finalizer
1377+
(strcmp(pParam, "-Xfinalizer") == 0) || // used to pass options to visa finalizer
1378+
(strcmp(pParam, "-cl-intel-static-profile-guided-trimming") == 0) || //used to enable profile-guided trimming
1379+
(strcmp(pParam, "-ze-opt-static-profile-guided-trimming") == 0); //used to enable profile-guided trimming
13781380

13791381
if (isCommonOption)
13801382
{

IGC/Options/include/igc/Options/IGCApiOptions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ defm fp64_gen_emu : CommonFlag<"fp64-gen-emu">;
5858
// -cl-fp64-gen-conv-emu -ze-fp64-gen-conv-emu
5959
defm fp64_gen_conv_emu : CommonFlag<"fp64-gen-conv-emu">;
6060

61+
// -cl-intel-profile-guided-trimming, -ze-opt-profile-guided-trimming
62+
defm static_profile_guided_trimming : CommonFlag<"static-profile-guided-trimming">;
63+
6164
// }} Backend API options
6265

6366
// API options from source translation {{
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: regkeys, dg2-supported
10+
11+
// RUN: ocloc compile -file %s -device dg2 -options "-igc_opts 'SubroutineThreshold=1,KernelTotalSizeThreshold=1,ControlInlineTinySize=1,PrintControlKernelTotalSize=15'" 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT
12+
// RUN: ocloc compile -file %s -device dg2 -options "-ze-opt-static-profile-guided-trimming -igc_opts 'SubroutineThreshold=1,KernelTotalSizeThreshold=1,ControlInlineTinySize=1,ControlInlineTinySizeForSPGT=150,PrintControlKernelTotalSize=15'" 2>&1 | FileCheck %s --check-prefix=CHECK-SPGT
13+
14+
// CHECK-DEFAULT: Good to trim (Big enough > 1), bar3, Function Attribute: Best effort innline, Function size: 13, Freq: 0.0
15+
// CHECK-SPGT: Can't trim (Low weight < 0.03218650818), bar3, Function Attribute: Best effort innline, Function size: 163, Size after collapsing: 163, Size contribution: 652, Freq: 327680.0, Weight: 0.002581326365
16+
17+
int bar3(__global int *c) {
18+
int k = 10;
19+
for (int i = 0 ; i < 100 ; i++) {
20+
*c += k * i;
21+
}
22+
return k;
23+
}
24+
25+
int bar2(__global int *b) {
26+
int k = 10;
27+
for (int i = 0 ; i < 100 ; i++) {
28+
*b += k * bar3(b);
29+
}
30+
return k;
31+
}
32+
33+
int bar1(__global int *a) {
34+
int k = 10;
35+
for (int i = 0 ; i < 100 ; i++) {
36+
*a += k * bar2(a);
37+
*a += k * bar3(a);
38+
}
39+
return k;
40+
}
41+
42+
__kernel void foo(int __global *p) {
43+
int a = 0;
44+
for (int i = 0; i < 100; i++) {
45+
a += bar1(p);
46+
a += bar2(p);
47+
a += bar3(p);
48+
}
49+
for (int i = 300; i < 500000; i++) {
50+
a += *p;
51+
}
52+
for (int i = 300; i < 500000; i++) {
53+
a += *p;
54+
}
55+
for (int i = 300; i < 500000; i++) {
56+
a += *p;
57+
}
58+
for (int i = 300; i < 500000; i++) {
59+
a += *p;
60+
}
61+
for (int i = 300; i < 500000; i++) {
62+
a += *p;
63+
}
64+
*p = a;
65+
}

0 commit comments

Comments
 (0)