@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
101
101
" Enable specialization of functions that take a literal constant as an "
102
102
" argument" ));
103
103
104
- // Estimates the instruction cost of all the basic blocks in \p WorkList.
105
- // The successors of such blocks are added to the list as long as they are
106
- // executable and they have a unique predecessor. \p WorkList represents
107
- // the basic blocks of a specialization which become dead once we replace
108
- // instructions that are known to be constants. The aim here is to estimate
109
- // the combination of size and latency savings in comparison to the non
110
- // specialized version of the function.
104
+ // Estimates the codesize savings due to dead code after constant propagation.
105
+ // \p WorkList represents the basic blocks of a specialization which will
106
+ // eventually become dead once we replace instructions that are known to be
107
+ // constants. The successors of such blocks are added to the list as long as
108
+ // the \p Solver found they were executable prior to specialization, and only
109
+ // if they have a unique predecessor.
111
110
static Cost estimateBasicBlocks (SmallVectorImpl<BasicBlock *> &WorkList,
112
111
DenseSet<BasicBlock *> &DeadBlocks,
113
112
ConstMap &KnownConstants, SCCPSolver &Solver,
114
- BlockFrequencyInfo &BFI,
115
113
TargetTransformInfo &TTI) {
116
- Cost Bonus = 0 ;
117
-
114
+ Cost CodeSize = 0 ;
118
115
// Accumulate the instruction cost of each basic block weighted by frequency.
119
116
while (!WorkList.empty ()) {
120
117
BasicBlock *BB = WorkList.pop_back_val ();
121
118
122
- uint64_t Weight = BFI.getBlockFreq (BB).getFrequency () /
123
- BFI.getEntryFreq ();
124
- if (!Weight)
125
- continue ;
126
-
127
119
// These blocks are considered dead as far as the InstCostVisitor
128
120
// is concerned. They haven't been proven dead yet by the Solver,
129
121
// but may become if we propagate the specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
139
131
if (KnownConstants.contains (&I))
140
132
continue ;
141
133
142
- Bonus += Weight *
143
- TTI.getInstructionCost (&I, TargetTransformInfo::TCK_SizeAndLatency);
134
+ Cost C = TTI.getInstructionCost (&I, TargetTransformInfo::TCK_CodeSize);
144
135
145
- LLVM_DEBUG (dbgs () << " FnSpecialization: Bonus " << Bonus
146
- << " after user " << I << " \n " );
136
+ LLVM_DEBUG (dbgs () << " FnSpecialization: CodeSize " << C
137
+ << " for user " << I << " \n " );
138
+ CodeSize += C;
147
139
}
148
140
149
141
// Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
153
145
SuccBB->getUniquePredecessor () == BB)
154
146
WorkList.push_back (SuccBB);
155
147
}
156
- return Bonus ;
148
+ return CodeSize ;
157
149
}
158
150
159
151
static Constant *findConstantFor (Value *V, ConstMap &KnownConstants) {
@@ -164,55 +156,57 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
164
156
return nullptr ;
165
157
}
166
158
167
- Cost InstCostVisitor::getBonusFromPendingPHIs () {
168
- Cost Bonus = 0 ;
159
+ Bonus InstCostVisitor::getBonusFromPendingPHIs () {
160
+ Bonus B ;
169
161
while (!PendingPHIs.empty ()) {
170
162
Instruction *Phi = PendingPHIs.pop_back_val ();
171
163
// The pending PHIs could have been proven dead by now.
172
164
if (isBlockExecutable (Phi->getParent ()))
173
- Bonus += getUserBonus (Phi);
165
+ B += getUserBonus (Phi);
174
166
}
175
- return Bonus ;
167
+ return B ;
176
168
}
177
169
178
- Cost InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
170
+ Bonus InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
179
171
// We have already propagated a constant for this user.
180
172
if (KnownConstants.contains (User))
181
- return 0 ;
173
+ return { 0 , 0 } ;
182
174
183
175
// Cache the iterator before visiting.
184
176
LastVisited = Use ? KnownConstants.insert ({Use, C}).first
185
177
: KnownConstants.end ();
186
178
187
- if (auto *I = dyn_cast<SwitchInst>(User))
188
- return estimateSwitchInst (*I);
189
-
190
- if (auto *I = dyn_cast<BranchInst>(User))
191
- return estimateBranchInst (*I);
192
-
193
- C = visit (*User);
194
- if (!C)
195
- return 0 ;
179
+ Cost CodeSize = 0 ;
180
+ if (auto *I = dyn_cast<SwitchInst>(User)) {
181
+ CodeSize = estimateSwitchInst (*I);
182
+ } else if (auto *I = dyn_cast<BranchInst>(User)) {
183
+ CodeSize = estimateBranchInst (*I);
184
+ } else {
185
+ C = visit (*User);
186
+ if (!C)
187
+ return {0 , 0 };
188
+ KnownConstants.insert ({User, C});
189
+ }
196
190
197
- KnownConstants. insert ({ User, C} );
191
+ CodeSize += TTI. getInstructionCost ( User, TargetTransformInfo::TCK_CodeSize );
198
192
199
193
uint64_t Weight = BFI.getBlockFreq (User->getParent ()).getFrequency () /
200
194
BFI.getEntryFreq ();
201
- if (!Weight)
202
- return 0 ;
203
195
204
- Cost Bonus = Weight *
205
- TTI.getInstructionCost (User, TargetTransformInfo::TCK_SizeAndLatency );
196
+ Cost Latency = Weight *
197
+ TTI.getInstructionCost (User, TargetTransformInfo::TCK_Latency );
206
198
207
- LLVM_DEBUG (dbgs () << " FnSpecialization: Bonus " << Bonus
208
- << " for user " << *User << " \n " );
199
+ LLVM_DEBUG (dbgs () << " FnSpecialization: {CodeSize = " << CodeSize
200
+ << " , Latency = " << Latency << " } for user "
201
+ << *User << " \n " );
209
202
203
+ Bonus B (CodeSize, Latency);
210
204
for (auto *U : User->users ())
211
205
if (auto *UI = dyn_cast<Instruction>(U))
212
206
if (UI != User && isBlockExecutable (UI->getParent ()))
213
- Bonus += getUserBonus (UI, User, C);
207
+ B += getUserBonus (UI, User, C);
214
208
215
- return Bonus ;
209
+ return B ;
216
210
}
217
211
218
212
Cost InstCostVisitor::estimateSwitchInst (SwitchInst &I) {
@@ -238,8 +232,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
238
232
WorkList.push_back (BB);
239
233
}
240
234
241
- return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, BFI,
242
- TTI);
235
+ return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, TTI);
243
236
}
244
237
245
238
Cost InstCostVisitor::estimateBranchInst (BranchInst &I) {
@@ -256,8 +249,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
256
249
Succ->getUniquePredecessor () == I.getParent ())
257
250
WorkList.push_back (Succ);
258
251
259
- return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, BFI,
260
- TTI);
252
+ return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, TTI);
261
253
}
262
254
263
255
Constant *InstCostVisitor::visitPHINode (PHINode &I) {
@@ -578,13 +570,18 @@ bool FunctionSpecializer::run() {
578
570
if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
579
571
continue ;
580
572
573
+ int64_t Sz = *Metrics.NumInsts .getValue ();
574
+ assert (Sz > 0 && " CodeSize should be positive" );
575
+ // It is safe to down cast from int64_t, NumInsts is always positive.
576
+ unsigned SpecCost = static_cast <unsigned >(Sz);
577
+
581
578
LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization cost for "
582
- << F.getName () << " is " << Metrics. NumInsts << " \n " );
579
+ << F.getName () << " is " << SpecCost << " \n " );
583
580
584
581
if (Inserted && Metrics.isRecursive )
585
582
promoteConstantStackValues (&F);
586
583
587
- if (!findSpecializations (&F, Metrics. NumInsts , AllSpecs, SM)) {
584
+ if (!findSpecializations (&F, SpecCost , AllSpecs, SM)) {
588
585
LLVM_DEBUG (
589
586
dbgs () << " FnSpecialization: No possible specializations found for "
590
587
<< F.getName () << " \n " );
@@ -719,7 +716,7 @@ static Function *cloneCandidateFunction(Function *F) {
719
716
return Clone;
720
717
}
721
718
722
- bool FunctionSpecializer::findSpecializations (Function *F, Cost SpecCost,
719
+ bool FunctionSpecializer::findSpecializations (Function *F, unsigned SpecCost,
723
720
SmallVectorImpl<Spec> &AllSpecs,
724
721
SpecMap &SM) {
725
722
// A mapping from a specialisation signature to the index of the respective
@@ -785,21 +782,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
785
782
AllSpecs[Index].CallSites .push_back (&CS);
786
783
} else {
787
784
// Calculate the specialisation gain.
788
- Cost Score = 0 ;
785
+ Bonus B ;
789
786
InstCostVisitor Visitor = getInstCostVisitorFor (F);
790
787
for (ArgInfo &A : S.Args )
791
- Score += getSpecializationBonus (A.Formal , A.Actual , Visitor);
792
- Score += Visitor.getBonusFromPendingPHIs ();
788
+ B += getSpecializationBonus (A.Formal , A.Actual , Visitor);
789
+ B += Visitor.getBonusFromPendingPHIs ();
793
790
794
- LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization score = "
795
- << Score << " \n " );
791
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization score {CodeSize = "
792
+ << B.CodeSize << " , Latency = " << B.Latency
793
+ << " }\n " );
796
794
797
795
// Discard unprofitable specialisations.
798
- if (!ForceSpecialization && Score <= SpecCost)
796
+ if (!ForceSpecialization && B. Latency <= SpecCost - B. CodeSize )
799
797
continue ;
800
798
801
799
// Create a new specialisation entry.
802
- auto &Spec = AllSpecs.emplace_back (F, S, Score );
800
+ auto &Spec = AllSpecs.emplace_back (F, S, B. Latency );
803
801
if (CS.getFunction () != F)
804
802
Spec.CallSites .push_back (&CS);
805
803
const unsigned Index = AllSpecs.size () - 1 ;
@@ -866,19 +864,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
866
864
}
867
865
868
866
// / Compute a bonus for replacing argument \p A with constant \p C.
869
- Cost FunctionSpecializer::getSpecializationBonus (Argument *A, Constant *C,
867
+ Bonus FunctionSpecializer::getSpecializationBonus (Argument *A, Constant *C,
870
868
InstCostVisitor &Visitor) {
871
869
LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
872
870
<< C->getNameOrAsOperand () << " \n " );
873
871
874
- Cost TotalCost = 0 ;
872
+ Bonus B ;
875
873
for (auto *U : A->users ())
876
874
if (auto *UI = dyn_cast<Instruction>(U))
877
875
if (Visitor.isBlockExecutable (UI->getParent ()))
878
- TotalCost += Visitor.getUserBonus (UI, A, C);
876
+ B += Visitor.getUserBonus (UI, A, C);
879
877
880
- LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated user bonus "
881
- << TotalCost << " for argument " << *A << " \n " );
878
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
879
+ << B.CodeSize << " , Latency = " << B.Latency
880
+ << " } for argument " << *A << " \n " );
882
881
883
882
// The below heuristic is only concerned with exposing inlining
884
883
// opportunities via indirect call promotion. If the argument is not a
@@ -888,7 +887,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
888
887
// while traversing the users of the specialization arguments ?
889
888
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts ());
890
889
if (!CalledFunction)
891
- return TotalCost ;
890
+ return B ;
892
891
893
892
// Get TTI for the called function (used for the inline cost).
894
893
auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -898,7 +897,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
898
897
// calls to be promoted to direct calls. If the indirect call promotion
899
898
// would likely enable the called function to be inlined, specializing is a
900
899
// good idea.
901
- int Bonus = 0 ;
900
+ int InliningBonus = 0 ;
902
901
for (User *U : A->users ()) {
903
902
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
904
903
continue ;
@@ -925,15 +924,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
925
924
// We clamp the bonus for this call to be between zero and the default
926
925
// threshold.
927
926
if (IC.isAlways ())
928
- Bonus += Params.DefaultThreshold ;
927
+ InliningBonus += Params.DefaultThreshold ;
929
928
else if (IC.isVariable () && IC.getCostDelta () > 0 )
930
- Bonus += IC.getCostDelta ();
929
+ InliningBonus += IC.getCostDelta ();
931
930
932
- LLVM_DEBUG (dbgs () << " FnSpecialization: Inlining bonus " << Bonus
931
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Inlining bonus " << InliningBonus
933
932
<< " for user " << *U << " \n " );
934
933
}
935
934
936
- return TotalCost + Bonus ;
935
+ return B += { 0 , InliningBonus} ;
937
936
}
938
937
939
938
// / Determine if it is possible to specialise the function for constant values
0 commit comments