Skip to content

Commit 8698d56

Browse files
committed
[Transforms][LICM] Add the ability to undo unprofitable reassociation
Consider the following piece of code: ``` void innermost_loop(int i, double d1, double d2, double delta, int n, double cells[n]) { int j; const double d1d = d1 * delta; const double d2d = d2 * delta; for (j = 0; j <= i; j++) cells[j] = d1d * cells[j + 1] + d2d * cells[j]; } ``` When compiling at -Ofast level, after the "Reassociate expressions" pass, this code is transformed into an equivalent of: ``` int j; for (j = 0; j <= i; j++) cells[j] = (d1 * cells[j + 1] + d2 * cells[j]) * delta; ``` Effectively, the computation of those loop invariants isn't done before the loop anymore, we have one extra multiplication on each loop iteration instead. Sadly, this results in a significant performance hit. Similarly, specifically crafted user code will also experience inability to hoist those invariants. This patch is solving this issue by adding the ability to undo such reassociation into the LICM pass. Note that for doing such transformation this pass requires the same conditions as the "Reassociate expressions" pass, namely, the involved binary operators must have the reassociations allowed (e.g. by specifying the `fast` attribute) and they must have single use only. Some parts of this patch were suggested by Nikita Popov. Reviewed By: huntergr, nikic, paulwalker-arm Differential Revision: https://reviews.llvm.org/D152281
1 parent 89e25a3 commit 8698d56

File tree

2 files changed

+524
-20
lines changed

2 files changed

+524
-20
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ STATISTIC(NumGEPsHoisted,
108108
"Number of geps reassociated and hoisted out of the loop");
109109
STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110110
"and hoisted out of the loop");
111+
STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112+
"reassociated and hoisted out of the loop");
111113

112114
/// Memory promotion is enabled by default.
113115
static cl::opt<bool>
@@ -127,6 +129,12 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
127129
cl::desc("Max num uses visited for identifying load "
128130
"invariance in loop using invariant start (default = 8)"));
129131

132+
cl::opt<unsigned> FPAssociationUpperLimit(
133+
"licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
134+
cl::desc(
135+
"Set upper limit for the number of transformations performed "
136+
"during a single round of hoisting the reassociated expressions."));
137+
130138
// Experimental option to allow imprecision in LICM in pathological cases, in
131139
// exchange for faster compile. This is to be removed if MemorySSA starts to
132140
// address the same issue. LICM calls MemorySSAWalker's
@@ -2674,6 +2682,72 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
26742682
return false;
26752683
}
26762684

2685+
/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
2686+
/// A1, A2, ... and C are loop invariants into expressions like
2687+
/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
2688+
/// invariant expressions. This functions returns true only if any hoisting has
2689+
/// actually occured.
2690+
static bool hoistFPAssociation(Instruction &I, Loop &L,
2691+
ICFLoopSafetyInfo &SafetyInfo,
2692+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2693+
DominatorTree *DT) {
2694+
using namespace PatternMatch;
2695+
Value *VariantOp = nullptr, *InvariantOp = nullptr;
2696+
2697+
if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) ||
2698+
!I.hasAllowReassoc())
2699+
return false;
2700+
if (L.isLoopInvariant(VariantOp))
2701+
std::swap(VariantOp, InvariantOp);
2702+
if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2703+
return false;
2704+
Value *Factor = InvariantOp;
2705+
2706+
// First, we need to make sure we should do the transformation.
2707+
SmallVector<Use *> Changes;
2708+
SmallVector<BinaryOperator *> Worklist;
2709+
if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
2710+
Worklist.push_back(VariantBinOp);
2711+
while (!Worklist.empty()) {
2712+
BinaryOperator *BO = Worklist.pop_back_val();
2713+
if (!BO->hasOneUse() || !BO->hasAllowReassoc())
2714+
return false;
2715+
BinaryOperator *Op0, *Op1;
2716+
if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) {
2717+
Worklist.push_back(Op0);
2718+
Worklist.push_back(Op1);
2719+
continue;
2720+
}
2721+
if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO))
2722+
return false;
2723+
Use &U0 = BO->getOperandUse(0);
2724+
Use &U1 = BO->getOperandUse(1);
2725+
if (L.isLoopInvariant(U0))
2726+
Changes.push_back(&U0);
2727+
else if (L.isLoopInvariant(U1))
2728+
Changes.push_back(&U1);
2729+
else
2730+
return false;
2731+
if (Changes.size() > FPAssociationUpperLimit)
2732+
return false;
2733+
}
2734+
if (Changes.empty())
2735+
return false;
2736+
2737+
// We know we should do it so let's do the transformation.
2738+
auto *Preheader = L.getLoopPreheader();
2739+
assert(Preheader && "Loop is not in simplify form?");
2740+
IRBuilder<> Builder(Preheader->getTerminator());
2741+
for (auto *U : Changes) {
2742+
assert(L.isLoopInvariant(U->get()));
2743+
Instruction *Ins = cast<Instruction>(U->getUser());
2744+
U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"));
2745+
}
2746+
I.replaceAllUsesWith(VariantOp);
2747+
eraseInstruction(I, SafetyInfo, MSSAU);
2748+
return true;
2749+
}
2750+
26772751
static bool hoistArithmetics(Instruction &I, Loop &L,
26782752
ICFLoopSafetyInfo &SafetyInfo,
26792753
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2701,6 +2775,12 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
27012775
return true;
27022776
}
27032777

2778+
if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2779+
++NumHoisted;
2780+
++NumFPAssociationsHoisted;
2781+
return true;
2782+
}
2783+
27042784
return false;
27052785
}
27062786

0 commit comments

Comments
 (0)