@@ -33,103 +33,9 @@ using namespace llvm;
33
33
using namespace llvm ::AMDGPU;
34
34
using namespace llvm ::PatternMatch;
35
35
36
- namespace {
37
- class AMDGPUUniformIntrinsicCombineLegacy : public FunctionPass {
38
- public:
39
- static char ID;
40
- AMDGPUUniformIntrinsicCombineLegacy () : FunctionPass(ID) {
41
- initializeAMDGPUUniformIntrinsicCombineLegacyPass (
42
- *PassRegistry::getPassRegistry ());
43
- }
44
- bool runOnFunction (Function &F) override ;
45
- void getAnalysisUsage (AnalysisUsage &AU) const override {
46
- AU.setPreservesCFG ();
47
- AU.addRequired <UniformityInfoWrapperPass>();
48
- AU.addRequired <TargetPassConfig>();
49
- }
50
- };
51
-
52
- class AMDGPUUniformIntrinsicCombineImpl
53
- : public InstVisitor<AMDGPUUniformIntrinsicCombineImpl> {
54
- private:
55
- const UniformityInfo *UI;
56
- bool optimizeUniformIntrinsicInst (IntrinsicInst &II) const ;
57
-
58
- public:
59
- AMDGPUUniformIntrinsicCombineImpl () = delete ;
60
- AMDGPUUniformIntrinsicCombineImpl (const UniformityInfo *UI) : UI(UI) {}
61
- bool run (Function &F);
62
- };
63
- } // namespace
64
-
65
- char AMDGPUUniformIntrinsicCombineLegacy::ID = 0 ;
66
- char &llvm::AMDGPUUniformIntrinsicCombineLegacyPassID =
67
- AMDGPUUniformIntrinsicCombineLegacy::ID;
68
-
69
- bool AMDGPUUniformIntrinsicCombineLegacy::runOnFunction (Function &F) {
70
- if (skipFunction (F)) {
71
- return false ;
72
- }
73
- const UniformityInfo *UI =
74
- &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
75
- return AMDGPUUniformIntrinsicCombineImpl (UI).run (F);
76
- }
77
-
78
- PreservedAnalyses
79
- AMDGPUUniformIntrinsicCombinePass::run (Function &F,
80
- FunctionAnalysisManager &AM) {
81
- const auto *UI = &AM.getResult <UniformityInfoAnalysis>(F);
82
- bool IsChanged = AMDGPUUniformIntrinsicCombineImpl (UI).run (F);
83
-
84
- if (!IsChanged) {
85
- return PreservedAnalyses::all ();
86
- }
87
- PreservedAnalyses PA;
88
- PA.preserve <DominatorTreeAnalysis>();
89
- PA.preserve <LoopAnalysis>();
90
- PA.preserve <ScalarEvolutionAnalysis>();
91
- PA.preserve <UniformityInfoAnalysis>();
92
- PA.preserve <TargetLibraryAnalysis>();
93
- return PA;
94
- }
95
-
96
- bool AMDGPUUniformIntrinsicCombineImpl::run (Function &F) {
97
- Module *M = F.getParent ();
98
- llvm::LLVMContext &Ctx = M->getContext ();
99
- // List of AMDGPU intrinsics to optimize if their arguments are uniform.
100
- std::vector<Intrinsic::ID> Intrinsics = {
101
- Intrinsic::amdgcn_permlane64, Intrinsic::amdgcn_readfirstlane,
102
- Intrinsic::amdgcn_readlane, Intrinsic::amdgcn_ballot};
103
-
104
- bool IsChanged = false ;
105
-
106
- // Iterate over each intrinsic in the list and process its uses within F.
107
- for (Intrinsic::ID IID : Intrinsics) {
108
- // Determine the correct return type for the intrinsic.
109
- // Most intrinsics return i32, but amdgcn_ballot returns i64.
110
- llvm::Type *IntrinsicTy = (IID == Intrinsic::amdgcn_ballot)
111
- ? llvm::Type::getInt64Ty (Ctx)
112
- : llvm::Type::getInt32Ty (Ctx);
113
-
114
- // Check if the intrinsic is declared in the module with the expected type.
115
- if (Function *Intr =
116
- Intrinsic::getDeclarationIfExists (M, IID, {IntrinsicTy})) {
117
- // Iterate over all users of the intrinsic.
118
- for (User *U : Intr->users ()) {
119
- // Ensure the user is an intrinsic call within function F.
120
- if (auto *II = dyn_cast<IntrinsicInst>(U)) {
121
- if (II->getFunction () == &F) {
122
- IsChanged |= optimizeUniformIntrinsicInst (*II);
123
- }
124
- }
125
- }
126
- }
127
- }
128
- return IsChanged;
129
- }
130
-
131
- bool AMDGPUUniformIntrinsicCombineImpl::optimizeUniformIntrinsicInst (
132
- IntrinsicInst &II) const {
36
+ // / Optimizes uniform intrinsics.
37
+ static bool optimizeUniformIntrinsic (IntrinsicInst &II,
38
+ const UniformityInfo *UI) {
133
39
llvm::Intrinsic::ID IID = II.getIntrinsicID ();
134
40
135
41
switch (IID) {
@@ -194,6 +100,84 @@ bool AMDGPUUniformIntrinsicCombineImpl::optimizeUniformIntrinsicInst(
194
100
return false ;
195
101
}
196
102
103
+ // / Iterates over the Intrinsics use in the function to optimise.
104
+ static bool runUniformIntrinsicCombine (Function &F, const UniformityInfo *UI) {
105
+ Module *M = F.getParent ();
106
+ llvm::LLVMContext &Ctx = M->getContext ();
107
+ // List of AMDGPU intrinsics to optimize if their arguments are uniform.
108
+ std::vector<Intrinsic::ID> Intrinsics = {
109
+ Intrinsic::amdgcn_permlane64, Intrinsic::amdgcn_readfirstlane,
110
+ Intrinsic::amdgcn_readlane, Intrinsic::amdgcn_ballot};
111
+
112
+ bool IsChanged = false ;
113
+
114
+ // Iterate over each intrinsic in the list and process its uses within F.
115
+ for (Intrinsic::ID IID : Intrinsics) {
116
+ // Determine the correct return type for the intrinsic.
117
+ // Most intrinsics return i32, but amdgcn_ballot returns i64.
118
+ llvm::Type *IntrinsicTy = (IID == Intrinsic::amdgcn_ballot)
119
+ ? llvm::Type::getInt64Ty (Ctx)
120
+ : llvm::Type::getInt32Ty (Ctx);
121
+
122
+ // Check if the intrinsic is declared in the module with the expected type.
123
+ if (Function *Intr =
124
+ Intrinsic::getDeclarationIfExists (M, IID, {IntrinsicTy})) {
125
+ // Iterate over all users of the intrinsic.
126
+ for (User *U : Intr->users ()) {
127
+ // Ensure the user is an intrinsic call within function F.
128
+ if (auto *II = dyn_cast<IntrinsicInst>(U)) {
129
+ if (II->getFunction () == &F) {
130
+ IsChanged |= optimizeUniformIntrinsic (*II, UI);
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
136
+ return IsChanged;
137
+ }
138
+ class AMDGPUUniformIntrinsicCombineLegacy : public FunctionPass {
139
+ public:
140
+ static char ID;
141
+ AMDGPUUniformIntrinsicCombineLegacy () : FunctionPass(ID) {
142
+ initializeAMDGPUUniformIntrinsicCombineLegacyPass (
143
+ *PassRegistry::getPassRegistry ());
144
+ }
145
+ bool runOnFunction (Function &F) override ;
146
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
147
+ AU.setPreservesCFG ();
148
+ AU.addRequired <UniformityInfoWrapperPass>();
149
+ AU.addRequired <TargetPassConfig>();
150
+ AU.addPreserved <UniformityInfoWrapperPass>();
151
+ }
152
+ };
153
+
154
+ char AMDGPUUniformIntrinsicCombineLegacy::ID = 0 ;
155
+ char &llvm::AMDGPUUniformIntrinsicCombineLegacyPassID =
156
+ AMDGPUUniformIntrinsicCombineLegacy::ID;
157
+
158
+ bool AMDGPUUniformIntrinsicCombineLegacy::runOnFunction (Function &F) {
159
+ if (skipFunction (F)) {
160
+ return false ;
161
+ }
162
+ const UniformityInfo *UI =
163
+ &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo ();
164
+ return runUniformIntrinsicCombine (F, UI);
165
+ }
166
+
167
+ PreservedAnalyses
168
+ AMDGPUUniformIntrinsicCombinePass::run (Function &F,
169
+ FunctionAnalysisManager &AM) {
170
+ const auto *UI = &AM.getResult <UniformityInfoAnalysis>(F);
171
+ bool IsChanged = runUniformIntrinsicCombine (F, UI);
172
+
173
+ if (!IsChanged) {
174
+ return PreservedAnalyses::all ();
175
+ }
176
+ PreservedAnalyses PA;
177
+ PA.preserve <UniformityInfoAnalysis>();
178
+ return PA;
179
+ }
180
+
197
181
INITIALIZE_PASS_BEGIN (AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
198
182
" AMDGPU uniformIntrinsic Combine" , false , false )
199
183
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
0 commit comments