11
11
// are propagated to the callee by specializing the function.
12
12
//
13
13
// Current limitations:
14
- // - It does not handle specialization of recursive functions,
15
14
// - It does not yet handle integer ranges.
16
15
// - Only 1 argument per function is specialised,
17
16
// - The cost-model could be further looked into,
@@ -68,9 +67,142 @@ static cl::opt<bool> EnableSpecializationForLiteralConstant(
68
67
" function-specialization-for-literal-constant" , cl::init(false ), cl::Hidden,
69
68
cl::desc(" Make function specialization available for literal constant." ));
70
69
70
+ // Helper to check if \p LV is either a constant or a constant
71
+ // range with a single element. This should cover exactly the same cases as the
72
+ // old ValueLatticeElement::isConstant() and is intended to be used in the
73
+ // transition to ValueLatticeElement.
74
+ static bool isConstant (const ValueLatticeElement &LV) {
75
+ return LV.isConstant () ||
76
+ (LV.isConstantRange () && LV.getConstantRange ().isSingleElement ());
77
+ }
78
+
71
79
// Helper to check if \p LV is either overdefined or a constant int.
72
80
static bool isOverdefined (const ValueLatticeElement &LV) {
73
- return !LV.isUnknownOrUndef () && !LV.isConstant ();
81
+ return !LV.isUnknownOrUndef () && !isConstant (LV);
82
+ }
83
+
84
+ static Constant *getPromotableAlloca (AllocaInst *Alloca, CallInst *Call) {
85
+ Value *StoreValue = nullptr ;
86
+ for (auto *User : Alloca->users ()) {
87
+ // We can't use llvm::isAllocaPromotable() as that would fail because of
88
+ // the usage in the CallInst, which is what we check here.
89
+ if (User == Call)
90
+ continue ;
91
+ if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
92
+ if (!Bitcast->hasOneUse () || *Bitcast->user_begin () != Call)
93
+ return nullptr ;
94
+ continue ;
95
+ }
96
+
97
+ if (auto *Store = dyn_cast<StoreInst>(User)) {
98
+ // This is a duplicate store, bail out.
99
+ if (StoreValue || Store->isVolatile ())
100
+ return nullptr ;
101
+ StoreValue = Store->getValueOperand ();
102
+ continue ;
103
+ }
104
+ // Bail if there is any other unknown usage.
105
+ return nullptr ;
106
+ }
107
+ return dyn_cast_or_null<Constant>(StoreValue);
108
+ }
109
+
110
+ // A constant stack value is an AllocaInst that has a single constant
111
+ // value stored to it. Return this constant if such an alloca stack value
112
+ // is a function argument.
113
+ static Constant *getConstantStackValue (CallInst *Call, Value *Val,
114
+ SCCPSolver &Solver) {
115
+ if (!Val)
116
+ return nullptr ;
117
+ Val = Val->stripPointerCasts ();
118
+ if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
119
+ return ConstVal;
120
+ auto *Alloca = dyn_cast<AllocaInst>(Val);
121
+ if (!Alloca || !Alloca->getAllocatedType ()->isIntegerTy ())
122
+ return nullptr ;
123
+ return getPromotableAlloca (Alloca, Call);
124
+ }
125
+
126
+ // To support specializing recursive functions, it is important to propagate
127
+ // constant arguments because after a first iteration of specialisation, a
128
+ // reduced example may look like this:
129
+ //
130
+ // define internal void @RecursiveFn(i32* arg1) {
131
+ // %temp = alloca i32, align 4
132
+ // store i32 2 i32* %temp, align 4
133
+ // call void @RecursiveFn.1(i32* nonnull %temp)
134
+ // ret void
135
+ // }
136
+ //
137
+ // Before a next iteration, we need to propagate the constant like so
138
+ // which allows further specialization in next iterations.
139
+ //
140
+ // @funcspec.arg = internal constant i32 2
141
+ //
142
+ // define internal void @someFunc(i32* arg1) {
143
+ // call void @otherFunc(i32* nonnull @funcspec.arg)
144
+ // ret void
145
+ // }
146
+ //
147
+ static void constantArgPropagation (SmallVectorImpl<Function *> &WorkList,
148
+ Module &M, SCCPSolver &Solver) {
149
+ // Iterate over the argument tracked functions see if there
150
+ // are any new constant values for the call instruction via
151
+ // stack variables.
152
+ for (auto *F : WorkList) {
153
+ // TODO: Generalize for any read only arguments.
154
+ if (F->arg_size () != 1 )
155
+ continue ;
156
+
157
+ auto &Arg = *F->arg_begin ();
158
+ if (!Arg.onlyReadsMemory () || !Arg.getType ()->isPointerTy ())
159
+ continue ;
160
+
161
+ for (auto *User : F->users ()) {
162
+ auto *Call = dyn_cast<CallInst>(User);
163
+ if (!Call)
164
+ break ;
165
+ auto *ArgOp = Call->getArgOperand (0 );
166
+ auto *ArgOpType = ArgOp->getType ();
167
+ auto *ConstVal = getConstantStackValue (Call, ArgOp, Solver);
168
+ if (!ConstVal)
169
+ break ;
170
+
171
+ Value *GV = new GlobalVariable (M, ConstVal->getType (), true ,
172
+ GlobalValue::InternalLinkage, ConstVal,
173
+ " funcspec.arg" );
174
+
175
+ if (ArgOpType != ConstVal->getType ())
176
+ GV = ConstantExpr::getBitCast (cast<Constant>(GV), ArgOp->getType ());
177
+
178
+ Call->setArgOperand (0 , GV);
179
+
180
+ // Add the changed CallInst to Solver Worklist
181
+ Solver.visitCall (*Call);
182
+ }
183
+ }
184
+ }
185
+
186
+ // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
187
+ // interfere with the constantArgPropagation optimization.
188
+ static void removeSSACopy (Function &F) {
189
+ for (BasicBlock &BB : F) {
190
+ for (BasicBlock::iterator BI = BB.begin (), E = BB.end (); BI != E;) {
191
+ Instruction *Inst = &*BI++;
192
+ auto *II = dyn_cast<IntrinsicInst>(Inst);
193
+ if (!II)
194
+ continue ;
195
+ if (II->getIntrinsicID () != Intrinsic::ssa_copy)
196
+ continue ;
197
+ Inst->replaceAllUsesWith (II->getOperand (0 ));
198
+ Inst->eraseFromParent ();
199
+ }
200
+ }
201
+ }
202
+
203
+ static void removeSSACopy (Module &M) {
204
+ for (Function &F : M)
205
+ removeSSACopy (F);
74
206
}
75
207
76
208
class FunctionSpecializer {
@@ -115,9 +247,14 @@ class FunctionSpecializer {
115
247
for (auto *SpecializedFunc : CurrentSpecializations) {
116
248
SpecializedFuncs.insert (SpecializedFunc);
117
249
118
- // TODO: If we want to support specializing specialized functions,
119
- // initialize here the state of the newly created functions, marking
120
- // them argument-tracked and executable.
250
+ // Initialize the state of the newly created functions, marking them
251
+ // argument-tracked and executable.
252
+ if (SpecializedFunc->hasExactDefinition () &&
253
+ !SpecializedFunc->hasFnAttribute (Attribute::Naked))
254
+ Solver.addTrackedFunction (SpecializedFunc);
255
+ Solver.addArgumentTrackedFunction (SpecializedFunc);
256
+ FuncDecls.push_back (SpecializedFunc);
257
+ Solver.markBlockExecutable (&SpecializedFunc->front ());
121
258
122
259
// Replace the function arguments for the specialized functions.
123
260
for (Argument &Arg : SpecializedFunc->args ())
@@ -138,12 +275,22 @@ class FunctionSpecializer {
138
275
const ValueLatticeElement &IV = Solver.getLatticeValueFor (V);
139
276
if (isOverdefined (IV))
140
277
return false ;
141
- auto *Const = IV. isConstant () ? Solver. getConstant (IV)
142
- : UndefValue::get (V->getType ());
278
+ auto *Const =
279
+ isConstant (IV) ? Solver. getConstant (IV) : UndefValue::get (V->getType ());
143
280
V->replaceAllUsesWith (Const);
144
281
145
- // TODO: Update the solver here if we want to specialize specialized
146
- // functions.
282
+ for (auto *U : Const->users ())
283
+ if (auto *I = dyn_cast<Instruction>(U))
284
+ if (Solver.isBlockExecutable (I->getParent ()))
285
+ Solver.visit (I);
286
+
287
+ // Remove the instruction from Block and Solver.
288
+ if (auto *I = dyn_cast<Instruction>(V)) {
289
+ if (I->isSafeToRemove ()) {
290
+ I->eraseFromParent ();
291
+ Solver.removeLatticeValueFor (I);
292
+ }
293
+ }
147
294
return true ;
148
295
}
149
296
@@ -152,6 +299,15 @@ class FunctionSpecializer {
152
299
// also in the cost model.
153
300
unsigned NbFunctionsSpecialized = 0 ;
154
301
302
+ // / Clone the function \p F and remove the ssa_copy intrinsics added by
303
+ // / the SCCPSolver in the cloned version.
304
+ Function *cloneCandidateFunction (Function *F) {
305
+ ValueToValueMapTy EmptyMap;
306
+ Function *Clone = CloneFunction (F, EmptyMap);
307
+ removeSSACopy (*Clone);
308
+ return Clone;
309
+ }
310
+
155
311
// / This function decides whether to specialize function \p F based on the
156
312
// / known constant values its arguments can take on. Specialization is
157
313
// / performed on the first interesting argument. Specializations based on
@@ -214,8 +370,7 @@ class FunctionSpecializer {
214
370
for (auto *C : Constants) {
215
371
// Clone the function. We leave the ValueToValueMap empty to allow
216
372
// IPSCCP to propagate the constant arguments.
217
- ValueToValueMapTy EmptyMap;
218
- Function *Clone = CloneFunction (F, EmptyMap);
373
+ Function *Clone = cloneCandidateFunction (F);
219
374
Argument *ClonedArg = Clone->arg_begin () + A.getArgNo ();
220
375
221
376
// Rewrite calls to the function so that they call the clone instead.
@@ -231,9 +386,10 @@ class FunctionSpecializer {
231
386
NbFunctionsSpecialized++;
232
387
}
233
388
234
- // TODO: if we want to support specialize specialized functions, and if
235
- // the function has been completely specialized, the original function is
236
- // no longer needed, so we would need to mark it unreachable here.
389
+ // If the function has been completely specialized, the original function
390
+ // is no longer needed. Mark it unreachable.
391
+ if (!IsPartial)
392
+ Solver.markFunctionUnreachable (F);
237
393
238
394
// FIXME: Only one argument per function.
239
395
return true ;
@@ -528,24 +684,6 @@ class FunctionSpecializer {
528
684
}
529
685
};
530
686
531
- // / Function to clean up the left over intrinsics from SCCP util.
532
- static void cleanup (Module &M) {
533
- for (Function &F : M) {
534
- for (BasicBlock &BB : F) {
535
- for (BasicBlock::iterator BI = BB.begin (), E = BB.end (); BI != E;) {
536
- Instruction *Inst = &*BI++;
537
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
538
- if (II->getIntrinsicID () == Intrinsic::ssa_copy) {
539
- Value *Op = II->getOperand (0 );
540
- Inst->replaceAllUsesWith (Op);
541
- Inst->eraseFromParent ();
542
- }
543
- }
544
- }
545
- }
546
- }
547
- }
548
-
549
687
bool llvm::runFunctionSpecialization (
550
688
Module &M, const DataLayout &DL,
551
689
std::function<TargetLibraryInfo &(Function &)> GetTLI,
@@ -637,14 +775,18 @@ bool llvm::runFunctionSpecialization(
637
775
unsigned I = 0 ;
638
776
while (FuncSpecializationMaxIters != I++ &&
639
777
FS.specializeFunctions (FuncDecls, CurrentSpecializations)) {
640
- // TODO: run the solver here for the specialized functions only if we want
641
- // to specialize recursively.
778
+
779
+ // Run the solver for the specialized functions.
780
+ RunSCCPSolver (CurrentSpecializations);
781
+
782
+ // Replace some unresolved constant arguments
783
+ constantArgPropagation (FuncDecls, M, Solver);
642
784
643
785
CurrentSpecializations.clear ();
644
786
Changed = true ;
645
787
}
646
788
647
789
// Clean up the IR by removing ssa_copy intrinsics.
648
- cleanup (M);
790
+ removeSSACopy (M);
649
791
return Changed;
650
792
}
0 commit comments