@@ -46,12 +46,12 @@ char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
46
46
// In code object v4 and older, we need to tell the runtime some amount ahead of
47
47
// time if we don't know the true stack size. Assume a smaller number if this is
48
48
// only due to dynamic / non-entry block allocas.
49
- static cl::opt<uint32_t > AssumedStackSizeForExternalCall (
49
+ static cl::opt<uint32_t > clAssumedStackSizeForExternalCall (
50
50
" amdgpu-assume-external-call-stack-size" ,
51
51
cl::desc (" Assumed stack use of any external call (in bytes)" ), cl::Hidden,
52
52
cl::init(16384 ));
53
53
54
- static cl::opt<uint32_t > AssumedStackSizeForDynamicSizeObjects (
54
+ static cl::opt<uint32_t > clAssumedStackSizeForDynamicSizeObjects (
55
55
" amdgpu-assume-dynamic-stack-object-size" ,
56
56
cl::desc (" Assumed extra stack use if there are any "
57
57
" variable sized objects (in bytes)" ),
@@ -112,11 +112,15 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
112
112
113
113
// By default, for code object v5 and later, track only the minimum scratch
114
114
// size
115
+ uint32_t AssumedStackSizeForDynamicSizeObjects =
116
+ clAssumedStackSizeForDynamicSizeObjects.getValue ();
117
+ uint32_t AssumedStackSizeForExternalCall =
118
+ clAssumedStackSizeForExternalCall.getValue ();
115
119
if (AMDGPU::getAMDHSACodeObjectVersion (M) >= AMDGPU::AMDHSA_COV5 ||
116
120
STI.getTargetTriple ().getOS () == Triple::AMDPAL) {
117
- if (!AssumedStackSizeForDynamicSizeObjects .getNumOccurrences ())
121
+ if (!clAssumedStackSizeForDynamicSizeObjects .getNumOccurrences ())
118
122
AssumedStackSizeForDynamicSizeObjects = 0 ;
119
- if (!AssumedStackSizeForExternalCall .getNumOccurrences ())
123
+ if (!clAssumedStackSizeForExternalCall .getNumOccurrences ())
120
124
AssumedStackSizeForExternalCall = 0 ;
121
125
}
122
126
@@ -132,7 +136,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
132
136
CallGraphResourceInfo.insert (std::pair (F, SIFunctionResourceInfo ()));
133
137
SIFunctionResourceInfo &Info = CI.first ->second ;
134
138
assert (CI.second && " should only be called once per function" );
135
- Info = analyzeResourceUsage (*MF, TM);
139
+ Info = analyzeResourceUsage (*MF, TM, AssumedStackSizeForDynamicSizeObjects,
140
+ AssumedStackSizeForExternalCall);
136
141
HasIndirectCall |= Info.HasIndirectCall ;
137
142
}
138
143
@@ -152,7 +157,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
152
157
SIFunctionResourceInfo &Info = CI.first ->second ;
153
158
MachineFunction *MF = MMI.getMachineFunction (*F);
154
159
assert (MF && " function must have been generated already" );
155
- Info = analyzeResourceUsage (*MF, TM);
160
+ Info = analyzeResourceUsage (*MF, TM, AssumedStackSizeForDynamicSizeObjects,
161
+ AssumedStackSizeForExternalCall);
156
162
HasIndirectCall |= Info.HasIndirectCall ;
157
163
}
158
164
@@ -164,7 +170,9 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
164
170
165
171
AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
166
172
AMDGPUResourceUsageAnalysis::analyzeResourceUsage (
167
- const MachineFunction &MF, const TargetMachine &TM) const {
173
+ const MachineFunction &MF, const TargetMachine &TM,
174
+ uint32_t AssumedStackSizeForDynamicSizeObjects,
175
+ uint32_t AssumedStackSizeForExternalCall) const {
168
176
SIFunctionResourceInfo Info;
169
177
170
178
const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
@@ -541,9 +549,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
541
549
// directly call the tail called function. If a kernel directly
542
550
// calls a tail recursive function, we'll assume maximum stack size
543
551
// based on the regular call instruction.
544
- CalleeFrameSize =
545
- std::max ( CalleeFrameSize,
546
- static_cast <uint64_t >(AssumedStackSizeForExternalCall));
552
+ CalleeFrameSize = std::max (
553
+ CalleeFrameSize,
554
+ static_cast <uint64_t >(AssumedStackSizeForExternalCall));
547
555
}
548
556
}
549
557
0 commit comments