74
74
75
75
using namespace llvm ;
76
76
using namespace llvm ::PatternMatch;
77
+ using namespace llvm ::AMDGPU;
77
78
78
79
namespace {
79
80
class SGPRRegisterRegAlloc : public RegisterRegAllocBase <SGPRRegisterRegAlloc> {
@@ -186,109 +187,95 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
186
187
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
187
188
} // anonymous namespace
188
189
189
- static cl::opt< bool >
190
- EnableEarlyIfConversion (" amdgpu-early-ifcvt" , cl::Hidden,
191
- cl::desc (" Run early if-conversion" ),
192
- cl::init(false ));
190
+ namespace llvm ::AMDGPU {
191
+ cl::opt< bool > EnableEarlyIfConversion (" amdgpu-early-ifcvt" , cl::Hidden,
192
+ cl::desc (" Run early if-conversion" ),
193
+ cl::init(false ));
193
194
194
- static cl::opt<bool >
195
- OptExecMaskPreRA (" amdgpu-opt-exec-mask-pre-ra" , cl::Hidden,
196
- cl::desc (" Run pre-RA exec mask optimizations" ),
197
- cl::init(true ));
195
+ cl::opt<bool > OptExecMaskPreRA (" amdgpu-opt-exec-mask-pre-ra" , cl::Hidden,
196
+ cl::desc (" Run pre-RA exec mask optimizations" ),
197
+ cl::init(true ));
198
198
199
- static cl::opt<bool >
199
+ cl::opt<bool >
200
200
LowerCtorDtor (" amdgpu-lower-global-ctor-dtor" ,
201
201
cl::desc (" Lower GPU ctor / dtors to globals on the device." ),
202
202
cl::init(true ), cl::Hidden);
203
203
204
204
// Option to disable vectorizer for tests.
205
- static cl::opt<bool > EnableLoadStoreVectorizer (
206
- " amdgpu-load-store-vectorizer" ,
207
- cl::desc (" Enable load store vectorizer" ),
208
- cl::init(true ),
209
- cl::Hidden);
205
+ cl::opt<bool >
206
+ EnableLoadStoreVectorizer (" amdgpu-load-store-vectorizer" ,
207
+ cl::desc (" Enable load store vectorizer" ),
208
+ cl::init(true ), cl::Hidden);
210
209
211
210
// Option to control global loads scalarization
212
- static cl::opt<bool > ScalarizeGlobal (
213
- " amdgpu-scalarize-global-loads" ,
214
- cl::desc (" Enable global load scalarization" ),
215
- cl::init(true ),
216
- cl::Hidden);
211
+ cl::opt<bool > ScalarizeGlobal (" amdgpu-scalarize-global-loads" ,
212
+ cl::desc (" Enable global load scalarization" ),
213
+ cl::init(true ), cl::Hidden);
217
214
218
215
// Option to run internalize pass.
219
- static cl::opt<bool > InternalizeSymbols (
220
- " amdgpu-internalize-symbols" ,
221
- cl::desc (" Enable elimination of non-kernel functions and unused globals" ),
222
- cl::init(false ),
223
- cl::Hidden);
216
+ cl::opt<bool > InternalizeSymbols (
217
+ " amdgpu-internalize-symbols" ,
218
+ cl::desc (" Enable elimination of non-kernel functions and unused globals" ),
219
+ cl::init(false ), cl::Hidden);
224
220
225
221
// Option to inline all early.
226
- static cl::opt<bool > EarlyInlineAll (
227
- " amdgpu-early-inline-all" ,
228
- cl::desc (" Inline all functions early" ),
229
- cl::init(false ),
230
- cl::Hidden);
222
+ cl::opt<bool > EarlyInlineAll (" amdgpu-early-inline-all" ,
223
+ cl::desc (" Inline all functions early" ),
224
+ cl::init(false ), cl::Hidden);
231
225
232
- static cl::opt<bool > RemoveIncompatibleFunctions (
226
+ cl::opt<bool > RemoveIncompatibleFunctions (
233
227
" amdgpu-enable-remove-incompatible-functions" , cl::Hidden,
234
228
cl::desc (" Enable removal of functions when they"
235
229
" use features not supported by the target GPU" ),
236
230
cl::init(true ));
237
231
238
- static cl::opt<bool > EnableSDWAPeephole (
239
- " amdgpu-sdwa-peephole" ,
240
- cl::desc (" Enable SDWA peepholer" ),
241
- cl::init(true ));
232
+ cl::opt<bool > EnableSDWAPeephole (" amdgpu-sdwa-peephole" ,
233
+ cl::desc (" Enable SDWA peepholer" ),
234
+ cl::init(true ));
242
235
243
- static cl::opt<bool > EnableDPPCombine (
244
- " amdgpu-dpp-combine" ,
245
- cl::desc (" Enable DPP combiner" ),
246
- cl::init(true ));
236
+ cl::opt<bool > EnableDPPCombine (" amdgpu-dpp-combine" ,
237
+ cl::desc (" Enable DPP combiner" ), cl::init(true ));
247
238
248
239
// Enable address space based alias analysis
249
- static cl::opt<bool > EnableAMDGPUAliasAnalysis (" enable-amdgpu-aa" , cl::Hidden,
250
- cl::desc (" Enable AMDGPU Alias Analysis" ),
251
- cl::init(true ));
240
+ cl::opt<bool >
241
+ EnableAMDGPUAliasAnalysis (" enable-amdgpu-aa" , cl::Hidden,
242
+ cl::desc (" Enable AMDGPU Alias Analysis" ),
243
+ cl::init(true ));
252
244
253
245
// Option to run late CFG structurizer
254
- static cl::opt<bool , true > LateCFGStructurize (
255
- " amdgpu-late-structurize" ,
256
- cl::desc (" Enable late CFG structurization" ),
257
- cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
258
- cl::Hidden);
246
+ cl::opt<bool , true > LateCFGStructurize (
247
+ " amdgpu-late-structurize" , cl::desc(" Enable late CFG structurization" ),
248
+ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden);
259
249
260
250
// Disable structurizer-based control-flow lowering in order to test convergence
261
251
// control tokens. This should eventually be replaced by the wave-transform.
262
- static cl::opt<bool , true > DisableStructurizer (
252
+ cl::opt<bool , true > DisableStructurizer (
263
253
" amdgpu-disable-structurizer" ,
264
254
cl::desc (" Disable structurizer for experiments; produces unusable code" ),
265
255
cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
266
256
267
257
// Enable lib calls simplifications
268
- static cl::opt<bool > EnableLibCallSimplify (
269
- " amdgpu-simplify-libcall" ,
270
- cl::desc (" Enable amdgpu library simplifications" ),
271
- cl::init(true ),
272
- cl::Hidden);
273
-
274
- static cl::opt<bool > EnableLowerKernelArguments (
275
- " amdgpu-ir-lower-kernel-arguments" ,
276
- cl::desc (" Lower kernel argument loads in IR pass" ),
277
- cl::init(true ),
278
- cl::Hidden);
279
-
280
- static cl::opt<bool > EnableRegReassign (
281
- " amdgpu-reassign-regs" ,
282
- cl::desc (" Enable register reassign optimizations on gfx10+" ),
283
- cl::init(true ),
284
- cl::Hidden);
285
-
286
- static cl::opt<bool > OptVGPRLiveRange (
258
+ cl::opt<bool >
259
+ EnableLibCallSimplify (" amdgpu-simplify-libcall" ,
260
+ cl::desc (" Enable amdgpu library simplifications" ),
261
+ cl::init(true ), cl::Hidden);
262
+
263
+ cl::opt<bool > EnableLowerKernelArguments (
264
+ " amdgpu-ir-lower-kernel-arguments" ,
265
+ cl::desc (" Lower kernel argument loads in IR pass" ), cl::init(true ),
266
+ cl::Hidden);
267
+
268
+ cl::opt<bool > EnableRegReassign (
269
+ " amdgpu-reassign-regs" ,
270
+ cl::desc (" Enable register reassign optimizations on gfx10+" ),
271
+ cl::init(true ), cl::Hidden);
272
+
273
+ cl::opt<bool > OptVGPRLiveRange (
287
274
" amdgpu-opt-vgpr-liverange" ,
288
275
cl::desc (" Enable VGPR liverange optimizations for if-else structure" ),
289
276
cl::init(true ), cl::Hidden);
290
277
291
- static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy (
278
+ cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy (
292
279
" amdgpu-atomic-optimizer-strategy" ,
293
280
cl::desc (" Select DPP or Iterative strategy for scan" ),
294
281
cl::init(ScanOptions::Iterative),
@@ -299,91 +286,85 @@ static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
299
286
clEnumValN(ScanOptions::None, " None" , " Disable atomic optimizer" )));
300
287
301
288
// Enable Mode register optimization
302
- static cl::opt<bool > EnableSIModeRegisterPass (
303
- " amdgpu-mode-register" ,
304
- cl::desc (" Enable mode register pass" ),
305
- cl::init(true ),
306
- cl::Hidden);
289
+ cl::opt<bool > EnableSIModeRegisterPass (" amdgpu-mode-register" ,
290
+ cl::desc (" Enable mode register pass" ),
291
+ cl::init(true ), cl::Hidden);
307
292
308
293
// Enable GFX11.5+ s_singleuse_vdst insertion
309
- static cl::opt<bool >
294
+ cl::opt<bool >
310
295
EnableInsertSingleUseVDST (" amdgpu-enable-single-use-vdst" ,
311
296
cl::desc (" Enable s_singleuse_vdst insertion" ),
312
297
cl::init(false ), cl::Hidden);
313
298
314
299
// Enable GFX11+ s_delay_alu insertion
315
- static cl::opt<bool >
316
- EnableInsertDelayAlu (" amdgpu-enable-delay-alu" ,
317
- cl::desc (" Enable s_delay_alu insertion" ),
318
- cl::init(true ), cl::Hidden);
300
+ cl::opt<bool > EnableInsertDelayAlu (" amdgpu-enable-delay-alu" ,
301
+ cl::desc (" Enable s_delay_alu insertion" ),
302
+ cl::init(true ), cl::Hidden);
319
303
320
304
// Enable GFX11+ VOPD
321
- static cl::opt<bool >
322
- EnableVOPD (" amdgpu-enable-vopd" ,
323
- cl::desc (" Enable VOPD, dual issue of VALU in wave32" ),
324
- cl::init(true ), cl::Hidden);
305
+ cl::opt<bool > EnableVOPD (" amdgpu-enable-vopd" ,
306
+ cl::desc (" Enable VOPD, dual issue of VALU in wave32" ),
307
+ cl::init(true ), cl::Hidden);
325
308
326
309
// Option is used in lit tests to prevent deadcoding of patterns inspected.
327
- static cl::opt<bool >
328
- EnableDCEInRA (" amdgpu-dce-in-ra" ,
329
- cl::init (true ), cl::Hidden,
330
- cl::desc(" Enable machine DCE inside regalloc" ));
310
+ cl::opt<bool > EnableDCEInRA (" amdgpu-dce-in-ra" , cl::init(true ), cl::Hidden,
311
+ cl::desc(" Enable machine DCE inside regalloc" ));
331
312
332
- static cl::opt<bool > EnableSetWavePriority (" amdgpu-set-wave-priority" ,
333
- cl::desc (" Adjust wave priority" ),
334
- cl::init(false ), cl::Hidden);
313
+ cl::opt<bool > EnableSetWavePriority (" amdgpu-set-wave-priority" ,
314
+ cl::desc (" Adjust wave priority" ),
315
+ cl::init(false ), cl::Hidden);
335
316
336
- static cl::opt<bool > EnableScalarIRPasses (
337
- " amdgpu-scalar-ir-passes" ,
338
- cl::desc (" Enable scalar IR passes" ),
339
- cl::init(true ),
340
- cl::Hidden);
317
+ cl::opt<bool > EnableScalarIRPasses (" amdgpu-scalar-ir-passes" ,
318
+ cl::desc (" Enable scalar IR passes" ),
319
+ cl::init(true ), cl::Hidden);
341
320
342
- static cl::opt<bool , true > EnableStructurizerWorkarounds (
321
+ cl::opt<bool , true > EnableStructurizerWorkarounds (
343
322
" amdgpu-enable-structurizer-workarounds" ,
344
323
cl::desc (" Enable workarounds for the StructurizeCFG pass" ),
345
324
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
346
325
cl::init(true ), cl::Hidden);
347
326
348
- static cl::opt<bool , true > EnableLowerModuleLDS (
327
+ cl::opt<bool , true > EnableLowerModuleLDS (
349
328
" amdgpu-enable-lower-module-lds" , cl::desc(" Enable lower module lds pass" ),
350
329
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true ),
351
330
cl::Hidden);
352
331
353
- static cl::opt<bool > EnablePreRAOptimizations (
354
- " amdgpu-enable-pre-ra-optimizations" ,
355
- cl::desc (" Enable Pre-RA optimizations pass" ), cl::init( true ),
356
- cl::Hidden);
332
+ cl::opt<bool >
333
+ EnablePreRAOptimizations ( " amdgpu-enable-pre-ra-optimizations" ,
334
+ cl::desc (" Enable Pre-RA optimizations pass" ),
335
+ cl::init( true ), cl::Hidden);
357
336
358
- static cl::opt<bool > EnablePromoteKernelArguments (
337
+ cl::opt<bool > EnablePromoteKernelArguments (
359
338
" amdgpu-enable-promote-kernel-arguments" ,
360
339
cl::desc (" Enable promotion of flat kernel pointer arguments to global" ),
361
340
cl::Hidden, cl::init(true ));
362
341
363
- static cl::opt<bool > EnableImageIntrinsicOptimizer (
342
+ cl::opt<bool > EnableImageIntrinsicOptimizer (
364
343
" amdgpu-enable-image-intrinsic-optimizer" ,
365
344
cl::desc (" Enable image intrinsic optimizer pass" ), cl::init(true ),
366
345
cl::Hidden);
367
346
368
- static cl::opt<bool >
347
+ cl::opt<bool >
369
348
EnableLoopPrefetch (" amdgpu-loop-prefetch" ,
370
349
cl::desc (" Enable loop data prefetch on AMDGPU" ),
371
350
cl::Hidden, cl::init(false ));
372
351
373
- static cl::opt<bool > EnableMaxIlpSchedStrategy (
352
+ cl::opt<bool > EnableMaxIlpSchedStrategy (
374
353
" amdgpu-enable-max-ilp-scheduling-strategy" ,
375
354
cl::desc (" Enable scheduling strategy to maximize ILP for a single wave." ),
376
355
cl::Hidden, cl::init(false ));
377
356
378
- static cl::opt<bool > EnableRewritePartialRegUses (
357
+ cl::opt<bool > EnableRewritePartialRegUses (
379
358
" amdgpu-enable-rewrite-partial-reg-uses" ,
380
359
cl::desc (" Enable rewrite partial reg uses pass" ), cl::init(true ),
381
360
cl::Hidden);
382
361
383
- static cl::opt<bool > EnableHipStdPar (
384
- " amdgpu-enable-hipstdpar" ,
385
- cl::desc (" Enable HIP Standard Parallelism Offload support" ), cl::init(false ),
386
- cl::Hidden);
362
+ cl::opt<bool >
363
+ EnableHipStdPar (" amdgpu-enable-hipstdpar" ,
364
+ cl::desc (" Enable HIP Standard Parallelism Offload support" ),
365
+ cl::init(false ), cl::Hidden);
366
+
367
+ } // namespace llvm::AMDGPU
387
368
388
369
static cl::opt<bool >
389
370
EnableAMDGPUAttributor (" amdgpu-attributor-enable" ,
0 commit comments