Skip to content

Commit 5ea4ecb

Browse files
[WIP][AMDGPU] combine uniform AMDGPUIntrinsics
1 parent b4339dd commit 5ea4ecb

File tree

5 files changed

+459
-39
lines changed

5 files changed

+459
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
9292

9393
void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
9494

95-
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
95+
void initializeAMDGPUAlwaysInlinePass(PassRegistry &);
9696

9797
Pass *createAMDGPUAnnotateKernelFeaturesPass();
9898
Pass *createAMDGPUAttributorLegacyPass();
@@ -226,11 +226,11 @@ extern char &GCNRegPressurePrinterID;
226226

227227
// Passes common to R600 and SI
228228
FunctionPass *createAMDGPUPromoteAlloca();
229-
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
229+
void initializeAMDGPUPromoteAllocaPass(PassRegistry &);
230230
extern char &AMDGPUPromoteAllocaID;
231231

232232
FunctionPass *createAMDGPUPromoteAllocaToVector();
233-
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
233+
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &);
234234
extern char &AMDGPUPromoteAllocaToVectorID;
235235

236236
struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
@@ -299,7 +299,7 @@ class AMDGPULateCodeGenPreparePass
299299
const GCNTargetMachine &TM;
300300

301301
public:
302-
AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
302+
AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM){};
303303
PreservedAnalyses run(Function &, FunctionAnalysisManager &);
304304
};
305305

@@ -325,7 +325,7 @@ class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
325325

326326
public:
327327
AMDGPUAttributorPass(TargetMachine &TM, AMDGPUAttributorOptions Options = {})
328-
: TM(TM), Options(Options) {};
328+
: TM(TM), Options(Options){};
329329
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
330330
};
331331

@@ -339,7 +339,7 @@ class AMDGPUAnnotateUniformValuesPass
339339
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
340340

341341
ModulePass *createAMDGPUPrintfRuntimeBinding();
342-
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
342+
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &);
343343
extern char &AMDGPUPrintfRuntimeBindingID;
344344

345345
void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
@@ -350,15 +350,15 @@ struct AMDGPUPrintfRuntimeBindingPass
350350
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
351351
};
352352

353-
ModulePass* createAMDGPUUnifyMetadataPass();
354-
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
353+
ModulePass *createAMDGPUUnifyMetadataPass();
354+
void initializeAMDGPUUnifyMetadataPass(PassRegistry &);
355355
extern char &AMDGPUUnifyMetadataID;
356356

357357
struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
358358
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
359359
};
360360

361-
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
361+
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &);
362362
extern char &SIOptimizeExecMaskingPreRAID;
363363

364364
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
@@ -367,7 +367,7 @@ extern char &SIOptimizeVGPRLiveRangeID;
367367
void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &);
368368
extern char &AMDGPUAnnotateUniformValuesLegacyPassID;
369369

370-
void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
370+
void initializeAMDGPUCodeGenPreparePass(PassRegistry &);
371371
extern char &AMDGPUCodeGenPrepareID;
372372

373373
void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
@@ -400,10 +400,10 @@ class SIAnnotateControlFlowPass
400400
void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &);
401401
extern char &SIAnnotateControlFlowLegacyPassID;
402402

403-
void initializeSIMemoryLegalizerPass(PassRegistry&);
403+
void initializeSIMemoryLegalizerPass(PassRegistry &);
404404
extern char &SIMemoryLegalizerID;
405405

406-
void initializeSIModeRegisterPass(PassRegistry&);
406+
void initializeSIModeRegisterPass(PassRegistry &);
407407
extern char &SIModeRegisterID;
408408

409409
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
@@ -412,25 +412,25 @@ extern char &AMDGPUInsertDelayAluID;
412412
void initializeSIInsertHardClausesPass(PassRegistry &);
413413
extern char &SIInsertHardClausesID;
414414

415-
void initializeSIInsertWaitcntsPass(PassRegistry&);
415+
void initializeSIInsertWaitcntsPass(PassRegistry &);
416416
extern char &SIInsertWaitcntsID;
417417

418-
void initializeSIFormMemoryClausesPass(PassRegistry&);
418+
void initializeSIFormMemoryClausesPass(PassRegistry &);
419419
extern char &SIFormMemoryClausesID;
420420

421-
void initializeSIPostRABundlerPass(PassRegistry&);
421+
void initializeSIPostRABundlerPass(PassRegistry &);
422422
extern char &SIPostRABundlerID;
423423

424424
void initializeGCNCreateVOPDPass(PassRegistry &);
425425
extern char &GCNCreateVOPDID;
426426

427-
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
427+
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &);
428428
extern char &AMDGPUUnifyDivergentExitNodesID;
429429

430430
ImmutablePass *createAMDGPUAAWrapperPass();
431-
void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
431+
void initializeAMDGPUAAWrapperPassPass(PassRegistry &);
432432
ImmutablePass *createAMDGPUExternalAAWrapperPass();
433-
void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
433+
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &);
434434

435435
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
436436

@@ -453,6 +453,17 @@ void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
453453
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
454454
extern char &GCNRewritePartialRegUsesID;
455455

456+
void initializeAMDGPUUniformIntrinsicCombinePass(PassRegistry &);
457+
extern char &AMDGPUUniformIntrinsicCombineID;
458+
FunctionPass *createAMDGPUUniformIntrinsicCombinePass();
459+
460+
struct AMDGPUUniformIntrinsicCombinePass
461+
: public PassInfoMixin<AMDGPUUniformIntrinsicCombinePass> {
462+
const AMDGPUTargetMachine &TM;
463+
AMDGPUUniformIntrinsicCombinePass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
464+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
465+
};
466+
456467
namespace AMDGPU {
457468
enum TargetIndex {
458469
TI_CONSTDATA_START,
@@ -488,7 +499,7 @@ static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
488499
return ASAliasRules[AS1][AS2];
489500
}
490501

491-
}
502+
} // namespace AMDGPU
492503

493504
} // End namespace llvm
494505

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,21 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
2222
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
2323
MODULE_PASS("amdgpu-sw-lower-lds", AMDGPUSwLowerLDSPass(*this))
2424
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
25-
MODULE_PASS("amdgpu-perf-hint",
26-
AMDGPUPerfHintAnalysisPass(
27-
*static_cast<const GCNTargetMachine *>(this)))
25+
MODULE_PASS(
26+
"amdgpu-perf-hint",
27+
AMDGPUPerfHintAnalysisPass(*static_cast<const GCNTargetMachine *>(this)))
2828
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
2929
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
3030
#undef MODULE_PASS
3131

3232
#ifndef MODULE_PASS_WITH_PARAMS
3333
#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
3434
#endif
35-
MODULE_PASS_WITH_PARAMS(
36-
"amdgpu-attributor", "AMDGPUAttributorPass",
37-
[=](AMDGPUAttributorOptions Options) {
38-
return AMDGPUAttributorPass(*this, Options);
39-
},
40-
parseAMDGPUAttributorPassOptions, "closed-world")
35+
MODULE_PASS_WITH_PARAMS("amdgpu-attributor", "AMDGPUAttributorPass",
36+
[=](AMDGPUAttributorOptions Options) {
37+
return AMDGPUAttributorPass(*this, Options);
38+
},
39+
parseAMDGPUAttributorPassOptions, "closed-world")
4140
#undef MODULE_PASS_WITH_PARAMS
4241

4342
#ifndef FUNCTION_PASS
@@ -47,9 +46,9 @@ FUNCTION_PASS("amdgpu-annotate-uniform", AMDGPUAnnotateUniformValuesPass())
4746
FUNCTION_PASS("amdgpu-codegenprepare", AMDGPUCodeGenPreparePass(*this))
4847
FUNCTION_PASS("amdgpu-image-intrinsic-opt",
4948
AMDGPUImageIntrinsicOptimizerPass(*this))
50-
FUNCTION_PASS("amdgpu-late-codegenprepare",
51-
AMDGPULateCodeGenPreparePass(
52-
*static_cast<const GCNTargetMachine *>(this)))
49+
FUNCTION_PASS(
50+
"amdgpu-late-codegenprepare",
51+
AMDGPULateCodeGenPreparePass(*static_cast<const GCNTargetMachine *>(this)))
5352
FUNCTION_PASS("amdgpu-lower-kernel-arguments",
5453
AMDGPULowerKernelArgumentsPass(*this))
5554
FUNCTION_PASS("amdgpu-lower-kernel-attributes",
@@ -64,7 +63,11 @@ FUNCTION_PASS("amdgpu-rewrite-undef-for-phi", AMDGPURewriteUndefForPHIPass())
6463
FUNCTION_PASS("amdgpu-unify-divergent-exit-nodes",
6564
AMDGPUUnifyDivergentExitNodesPass())
6665
FUNCTION_PASS("amdgpu-usenative", AMDGPUUseNativeCallsPass())
67-
FUNCTION_PASS("si-annotate-control-flow", SIAnnotateControlFlowPass(*static_cast<const GCNTargetMachine *>(this)))
66+
FUNCTION_PASS(
67+
"si-annotate-control-flow",
68+
SIAnnotateControlFlowPass(*static_cast<const GCNTargetMachine *>(this)))
69+
FUNCTION_PASS("amdgpu-uniformIntrinsic-combine",
70+
AMDGPUUniformIntrinsicCombinePass(*this))
6871
#undef FUNCTION_PASS
6972

7073
#ifndef FUNCTION_ANALYSIS
@@ -82,13 +85,13 @@ FUNCTION_ALIAS_ANALYSIS("amdgpu-aa", AMDGPUAA())
8285
#ifndef FUNCTION_PASS_WITH_PARAMS
8386
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
8487
#endif
85-
FUNCTION_PASS_WITH_PARAMS(
86-
"amdgpu-atomic-optimizer",
87-
"AMDGPUAtomicOptimizerPass",
88-
[=](ScanOptions Strategy) {
89-
return AMDGPUAtomicOptimizerPass(*this, Strategy);
90-
},
91-
parseAMDGPUAtomicOptimizerStrategy, "strategy=dpp|iterative|none")
88+
FUNCTION_PASS_WITH_PARAMS("amdgpu-atomic-optimizer",
89+
"AMDGPUAtomicOptimizerPass",
90+
[=](ScanOptions Strategy) {
91+
return AMDGPUAtomicOptimizerPass(*this, Strategy);
92+
},
93+
parseAMDGPUAtomicOptimizerStrategy,
94+
"strategy=dpp|iterative|none")
9295
#undef FUNCTION_PASS_WITH_PARAMS
9396

9497
#ifndef MACHINE_FUNCTION_PASS

0 commit comments

Comments
 (0)