Skip to content

Commit dc7ab09

Browse files
committed
AMDGPU: Declare pass control flags in header
This will allow them to be shared between the old PM and new PM files. I don't really like needing to expose these globally like this; maybe it would be better to just move TargetPassConfig and the CodeGenPassBuilder into one common file?
1 parent 3e142ea commit dc7ab09

File tree

2 files changed

+130
-108
lines changed

2 files changed

+130
-108
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 89 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
using namespace llvm;
7676
using namespace llvm::PatternMatch;
77+
using namespace llvm::AMDGPU;
7778

7879
namespace {
7980
class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
@@ -186,109 +187,95 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
186187
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
187188
} // anonymous namespace
188189

189-
static cl::opt<bool>
190-
EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
191-
cl::desc("Run early if-conversion"),
192-
cl::init(false));
190+
namespace llvm::AMDGPU {
191+
cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
192+
cl::desc("Run early if-conversion"),
193+
cl::init(false));
193194

194-
static cl::opt<bool>
195-
OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
196-
cl::desc("Run pre-RA exec mask optimizations"),
197-
cl::init(true));
195+
cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
196+
cl::desc("Run pre-RA exec mask optimizations"),
197+
cl::init(true));
198198

199-
static cl::opt<bool>
199+
cl::opt<bool>
200200
LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
201201
cl::desc("Lower GPU ctor / dtors to globals on the device."),
202202
cl::init(true), cl::Hidden);
203203

204204
// Option to disable vectorizer for tests.
205-
static cl::opt<bool> EnableLoadStoreVectorizer(
206-
"amdgpu-load-store-vectorizer",
207-
cl::desc("Enable load store vectorizer"),
208-
cl::init(true),
209-
cl::Hidden);
205+
cl::opt<bool>
206+
EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer",
207+
cl::desc("Enable load store vectorizer"),
208+
cl::init(true), cl::Hidden);
210209

211210
// Option to control global loads scalarization
212-
static cl::opt<bool> ScalarizeGlobal(
213-
"amdgpu-scalarize-global-loads",
214-
cl::desc("Enable global load scalarization"),
215-
cl::init(true),
216-
cl::Hidden);
211+
cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads",
212+
cl::desc("Enable global load scalarization"),
213+
cl::init(true), cl::Hidden);
217214

218215
// Option to run internalize pass.
219-
static cl::opt<bool> InternalizeSymbols(
220-
"amdgpu-internalize-symbols",
221-
cl::desc("Enable elimination of non-kernel functions and unused globals"),
222-
cl::init(false),
223-
cl::Hidden);
216+
cl::opt<bool> InternalizeSymbols(
217+
"amdgpu-internalize-symbols",
218+
cl::desc("Enable elimination of non-kernel functions and unused globals"),
219+
cl::init(false), cl::Hidden);
224220

225221
// Option to inline all early.
226-
static cl::opt<bool> EarlyInlineAll(
227-
"amdgpu-early-inline-all",
228-
cl::desc("Inline all functions early"),
229-
cl::init(false),
230-
cl::Hidden);
222+
cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all",
223+
cl::desc("Inline all functions early"),
224+
cl::init(false), cl::Hidden);
231225

232-
static cl::opt<bool> RemoveIncompatibleFunctions(
226+
cl::opt<bool> RemoveIncompatibleFunctions(
233227
"amdgpu-enable-remove-incompatible-functions", cl::Hidden,
234228
cl::desc("Enable removal of functions when they"
235229
"use features not supported by the target GPU"),
236230
cl::init(true));
237231

238-
static cl::opt<bool> EnableSDWAPeephole(
239-
"amdgpu-sdwa-peephole",
240-
cl::desc("Enable SDWA peepholer"),
241-
cl::init(true));
232+
cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole",
233+
cl::desc("Enable SDWA peepholer"),
234+
cl::init(true));
242235

243-
static cl::opt<bool> EnableDPPCombine(
244-
"amdgpu-dpp-combine",
245-
cl::desc("Enable DPP combiner"),
246-
cl::init(true));
236+
cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine",
237+
cl::desc("Enable DPP combiner"), cl::init(true));
247238

248239
// Enable address space based alias analysis
249-
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
250-
cl::desc("Enable AMDGPU Alias Analysis"),
251-
cl::init(true));
240+
cl::opt<bool>
241+
EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
242+
cl::desc("Enable AMDGPU Alias Analysis"),
243+
cl::init(true));
252244

253245
// Option to run late CFG structurizer
254-
static cl::opt<bool, true> LateCFGStructurize(
255-
"amdgpu-late-structurize",
256-
cl::desc("Enable late CFG structurization"),
257-
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
258-
cl::Hidden);
246+
cl::opt<bool, true> LateCFGStructurize(
247+
"amdgpu-late-structurize", cl::desc("Enable late CFG structurization"),
248+
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden);
259249

260250
// Disable structurizer-based control-flow lowering in order to test convergence
261251
// control tokens. This should eventually be replaced by the wave-transform.
262-
static cl::opt<bool, true> DisableStructurizer(
252+
cl::opt<bool, true> DisableStructurizer(
263253
"amdgpu-disable-structurizer",
264254
cl::desc("Disable structurizer for experiments; produces unusable code"),
265255
cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
266256

267257
// Enable lib calls simplifications
268-
static cl::opt<bool> EnableLibCallSimplify(
269-
"amdgpu-simplify-libcall",
270-
cl::desc("Enable amdgpu library simplifications"),
271-
cl::init(true),
272-
cl::Hidden);
273-
274-
static cl::opt<bool> EnableLowerKernelArguments(
275-
"amdgpu-ir-lower-kernel-arguments",
276-
cl::desc("Lower kernel argument loads in IR pass"),
277-
cl::init(true),
278-
cl::Hidden);
279-
280-
static cl::opt<bool> EnableRegReassign(
281-
"amdgpu-reassign-regs",
282-
cl::desc("Enable register reassign optimizations on gfx10+"),
283-
cl::init(true),
284-
cl::Hidden);
285-
286-
static cl::opt<bool> OptVGPRLiveRange(
258+
cl::opt<bool>
259+
EnableLibCallSimplify("amdgpu-simplify-libcall",
260+
cl::desc("Enable amdgpu library simplifications"),
261+
cl::init(true), cl::Hidden);
262+
263+
cl::opt<bool> EnableLowerKernelArguments(
264+
"amdgpu-ir-lower-kernel-arguments",
265+
cl::desc("Lower kernel argument loads in IR pass"), cl::init(true),
266+
cl::Hidden);
267+
268+
cl::opt<bool> EnableRegReassign(
269+
"amdgpu-reassign-regs",
270+
cl::desc("Enable register reassign optimizations on gfx10+"),
271+
cl::init(true), cl::Hidden);
272+
273+
cl::opt<bool> OptVGPRLiveRange(
287274
"amdgpu-opt-vgpr-liverange",
288275
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
289276
cl::init(true), cl::Hidden);
290277

291-
static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
278+
cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
292279
"amdgpu-atomic-optimizer-strategy",
293280
cl::desc("Select DPP or Iterative strategy for scan"),
294281
cl::init(ScanOptions::Iterative),
@@ -299,91 +286,85 @@ static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
299286
clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
300287

301288
// Enable Mode register optimization
302-
static cl::opt<bool> EnableSIModeRegisterPass(
303-
"amdgpu-mode-register",
304-
cl::desc("Enable mode register pass"),
305-
cl::init(true),
306-
cl::Hidden);
289+
cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register",
290+
cl::desc("Enable mode register pass"),
291+
cl::init(true), cl::Hidden);
307292

308293
// Enable GFX11.5+ s_singleuse_vdst insertion
309-
static cl::opt<bool>
294+
cl::opt<bool>
310295
EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
311296
cl::desc("Enable s_singleuse_vdst insertion"),
312297
cl::init(false), cl::Hidden);
313298

314299
// Enable GFX11+ s_delay_alu insertion
315-
static cl::opt<bool>
316-
EnableInsertDelayAlu("amdgpu-enable-delay-alu",
317-
cl::desc("Enable s_delay_alu insertion"),
318-
cl::init(true), cl::Hidden);
300+
cl::opt<bool> EnableInsertDelayAlu("amdgpu-enable-delay-alu",
301+
cl::desc("Enable s_delay_alu insertion"),
302+
cl::init(true), cl::Hidden);
319303

320304
// Enable GFX11+ VOPD
321-
static cl::opt<bool>
322-
EnableVOPD("amdgpu-enable-vopd",
323-
cl::desc("Enable VOPD, dual issue of VALU in wave32"),
324-
cl::init(true), cl::Hidden);
305+
cl::opt<bool> EnableVOPD("amdgpu-enable-vopd",
306+
cl::desc("Enable VOPD, dual issue of VALU in wave32"),
307+
cl::init(true), cl::Hidden);
325308

326309
// Option is used in lit tests to prevent deadcoding of patterns inspected.
327-
static cl::opt<bool>
328-
EnableDCEInRA("amdgpu-dce-in-ra",
329-
cl::init(true), cl::Hidden,
330-
cl::desc("Enable machine DCE inside regalloc"));
310+
cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden,
311+
cl::desc("Enable machine DCE inside regalloc"));
331312

332-
static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
333-
cl::desc("Adjust wave priority"),
334-
cl::init(false), cl::Hidden);
313+
cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
314+
cl::desc("Adjust wave priority"),
315+
cl::init(false), cl::Hidden);
335316

336-
static cl::opt<bool> EnableScalarIRPasses(
337-
"amdgpu-scalar-ir-passes",
338-
cl::desc("Enable scalar IR passes"),
339-
cl::init(true),
340-
cl::Hidden);
317+
cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes",
318+
cl::desc("Enable scalar IR passes"),
319+
cl::init(true), cl::Hidden);
341320

342-
static cl::opt<bool, true> EnableStructurizerWorkarounds(
321+
cl::opt<bool, true> EnableStructurizerWorkarounds(
343322
"amdgpu-enable-structurizer-workarounds",
344323
cl::desc("Enable workarounds for the StructurizeCFG pass"),
345324
cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
346325
cl::init(true), cl::Hidden);
347326

348-
static cl::opt<bool, true> EnableLowerModuleLDS(
327+
cl::opt<bool, true> EnableLowerModuleLDS(
349328
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
350329
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
351330
cl::Hidden);
352331

353-
static cl::opt<bool> EnablePreRAOptimizations(
354-
"amdgpu-enable-pre-ra-optimizations",
355-
cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
356-
cl::Hidden);
332+
cl::opt<bool>
333+
EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations",
334+
cl::desc("Enable Pre-RA optimizations pass"),
335+
cl::init(true), cl::Hidden);
357336

358-
static cl::opt<bool> EnablePromoteKernelArguments(
337+
cl::opt<bool> EnablePromoteKernelArguments(
359338
"amdgpu-enable-promote-kernel-arguments",
360339
cl::desc("Enable promotion of flat kernel pointer arguments to global"),
361340
cl::Hidden, cl::init(true));
362341

363-
static cl::opt<bool> EnableImageIntrinsicOptimizer(
342+
cl::opt<bool> EnableImageIntrinsicOptimizer(
364343
"amdgpu-enable-image-intrinsic-optimizer",
365344
cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
366345
cl::Hidden);
367346

368-
static cl::opt<bool>
347+
cl::opt<bool>
369348
EnableLoopPrefetch("amdgpu-loop-prefetch",
370349
cl::desc("Enable loop data prefetch on AMDGPU"),
371350
cl::Hidden, cl::init(false));
372351

373-
static cl::opt<bool> EnableMaxIlpSchedStrategy(
352+
cl::opt<bool> EnableMaxIlpSchedStrategy(
374353
"amdgpu-enable-max-ilp-scheduling-strategy",
375354
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
376355
cl::Hidden, cl::init(false));
377356

378-
static cl::opt<bool> EnableRewritePartialRegUses(
357+
cl::opt<bool> EnableRewritePartialRegUses(
379358
"amdgpu-enable-rewrite-partial-reg-uses",
380359
cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
381360
cl::Hidden);
382361

383-
static cl::opt<bool> EnableHipStdPar(
384-
"amdgpu-enable-hipstdpar",
385-
cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
386-
cl::Hidden);
362+
cl::opt<bool>
363+
EnableHipStdPar("amdgpu-enable-hipstdpar",
364+
cl::desc("Enable HIP Standard Parallelism Offload support"),
365+
cl::init(false), cl::Hidden);
366+
367+
} // namespace llvm::AMDGPU
387368

388369
static cl::opt<bool>
389370
EnableAMDGPUAttributor("amdgpu-attributor-enable",

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,53 @@
1616

1717
#include "GCNSubtarget.h"
1818
#include "llvm/CodeGen/TargetPassConfig.h"
19+
#include "llvm/Support/CommandLine.h"
1920
#include "llvm/Target/TargetMachine.h"
2021
#include <optional>
2122
#include <utility>
2223

2324
namespace llvm {
2425

26+
enum class ScanOptions;
27+
28+
namespace AMDGPU {
29+
30+
extern cl::opt<bool> EnableEarlyIfConversion;
31+
extern cl::opt<bool> OptExecMaskPreRA;
32+
extern cl::opt<bool> LowerCtorDtor;
33+
extern cl::opt<bool> EnableLoadStoreVectorizer;
34+
extern cl::opt<bool> ScalarizeGlobal;
35+
extern cl::opt<bool> InternalizeSymbols;
36+
extern cl::opt<bool> EarlyInlineAll;
37+
extern cl::opt<bool> RemoveIncompatibleFunctions;
38+
extern cl::opt<bool> EnableSDWAPeephole;
39+
extern cl::opt<bool> EnableDPPCombine;
40+
extern cl::opt<bool> EnableAMDGPUAliasAnalysis;
41+
extern cl::opt<bool, true> LateCFGStructurize;
42+
extern cl::opt<bool, true> DisableStructurizer;
43+
extern cl::opt<bool> EnableLibCallSimplify;
44+
extern cl::opt<bool> EnableLowerKernelArguments;
45+
extern cl::opt<bool> EnableRegReassign;
46+
extern cl::opt<bool> OptVGPRLiveRange;
47+
extern cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy;
48+
extern cl::opt<bool> EnableSIModeRegisterPass;
49+
extern cl::opt<bool> EnableInsertSingleUseVDST;
50+
extern cl::opt<bool> EnableInsertDelayAlu;
51+
extern cl::opt<bool> EnableVOPD;
52+
extern cl::opt<bool> EnableDCEInRA;
53+
extern cl::opt<bool> EnableSetWavePriority;
54+
extern cl::opt<bool> EnableScalarIRPasses;
55+
extern cl::opt<bool, true> EnableStructurizerWorkarounds;
56+
extern cl::opt<bool, true> EnableLowerModuleLDS;
57+
extern cl::opt<bool> EnablePreRAOptimizations;
58+
extern cl::opt<bool> EnablePromoteKernelArguments;
59+
extern cl::opt<bool> EnableImageIntrinsicOptimizer;
60+
extern cl::opt<bool> EnableLoopPrefetch;
61+
extern cl::opt<bool> EnableMaxIlpSchedStrategy;
62+
extern cl::opt<bool> EnableRewritePartialRegUses;
63+
extern cl::opt<bool> EnableHipStdPar;
64+
} // namespace AMDGPU
65+
2566
//===----------------------------------------------------------------------===//
2667
// AMDGPU Target Machine (R600+)
2768
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)