@@ -46,12 +46,12 @@ char &llvm::AMDGPUResourceUsageAnalysisID = AMDGPUResourceUsageAnalysis::ID;
46
46
// In code object v4 and older, we need to tell the runtime some amount ahead of
47
47
// time if we don't know the true stack size. Assume a smaller number if this is
48
48
// only due to dynamic / non-entry block allocas.
49
- static cl::opt<uint32_t > AssumedStackSizeForExternalCall (
49
+ static cl::opt<uint32_t > clAssumedStackSizeForExternalCall (
50
50
" amdgpu-assume-external-call-stack-size" ,
51
51
cl::desc (" Assumed stack use of any external call (in bytes)" ), cl::Hidden,
52
52
cl::init(16384 ));
53
53
54
- static cl::opt<uint32_t > AssumedStackSizeForDynamicSizeObjects (
54
+ static cl::opt<uint32_t > clAssumedStackSizeForDynamicSizeObjects (
55
55
" amdgpu-assume-dynamic-stack-object-size" ,
56
56
cl::desc (" Assumed extra stack use if there are any "
57
57
" variable sized objects (in bytes)" ),
@@ -112,11 +112,14 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
112
112
113
113
// By default, for code object v5 and later, track only the minimum scratch
114
114
// size
115
+ uint32_t AssumedStackSizeForDynamicSizeObjects =
116
+ clAssumedStackSizeForDynamicSizeObjects;
117
+ uint32_t AssumedStackSizeForExternalCall = clAssumedStackSizeForExternalCall;
115
118
if (AMDGPU::getAMDHSACodeObjectVersion (M) >= AMDGPU::AMDHSA_COV5 ||
116
119
STI.getTargetTriple ().getOS () == Triple::AMDPAL) {
117
- if (!AssumedStackSizeForDynamicSizeObjects .getNumOccurrences ())
120
+ if (clAssumedStackSizeForDynamicSizeObjects .getNumOccurrences () == 0 )
118
121
AssumedStackSizeForDynamicSizeObjects = 0 ;
119
- if (!AssumedStackSizeForExternalCall .getNumOccurrences ())
122
+ if (clAssumedStackSizeForExternalCall .getNumOccurrences () == 0 )
120
123
AssumedStackSizeForExternalCall = 0 ;
121
124
}
122
125
@@ -132,7 +135,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
132
135
CallGraphResourceInfo.insert (std::pair (F, SIFunctionResourceInfo ()));
133
136
SIFunctionResourceInfo &Info = CI.first ->second ;
134
137
assert (CI.second && " should only be called once per function" );
135
- Info = analyzeResourceUsage (*MF, TM);
138
+ Info = analyzeResourceUsage (*MF, TM, AssumedStackSizeForDynamicSizeObjects,
139
+ AssumedStackSizeForExternalCall);
136
140
HasIndirectCall |= Info.HasIndirectCall ;
137
141
}
138
142
@@ -152,7 +156,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
152
156
SIFunctionResourceInfo &Info = CI.first ->second ;
153
157
MachineFunction *MF = MMI.getMachineFunction (*F);
154
158
assert (MF && " function must have been generated already" );
155
- Info = analyzeResourceUsage (*MF, TM);
159
+ Info = analyzeResourceUsage (*MF, TM, AssumedStackSizeForDynamicSizeObjects,
160
+ AssumedStackSizeForExternalCall);
156
161
HasIndirectCall |= Info.HasIndirectCall ;
157
162
}
158
163
@@ -164,7 +169,9 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
164
169
165
170
AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
166
171
AMDGPUResourceUsageAnalysis::analyzeResourceUsage (
167
- const MachineFunction &MF, const TargetMachine &TM) const {
172
+ const MachineFunction &MF, const TargetMachine &TM,
173
+ uint32_t AssumedStackSizeForDynamicSizeObjects,
174
+ uint32_t AssumedStackSizeForExternalCall) const {
168
175
SIFunctionResourceInfo Info;
169
176
170
177
const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
@@ -541,9 +548,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
541
548
// directly call the tail called function. If a kernel directly
542
549
// calls a tail recursive function, we'll assume maximum stack size
543
550
// based on the regular call instruction.
544
- CalleeFrameSize =
545
- std::max ( CalleeFrameSize,
546
- static_cast <uint64_t >(AssumedStackSizeForExternalCall));
551
+ CalleeFrameSize = std::max (
552
+ CalleeFrameSize,
553
+ static_cast <uint64_t >(AssumedStackSizeForExternalCall));
547
554
}
548
555
}
549
556
0 commit comments