Skip to content

Commit 292389a

Browse files
committed
[LICM] Fold associative binary ops to promote code hoisting
Perform the transformation "(LV op C1) op C2" ==> "LV op (C1 op C2)" where op is an associative binary op, LV is a loop variant, and C1 and C2 are loop invariants to hoist (C1 op C2) into the preheader.
1 parent 1a41825 commit 292389a

File tree

3 files changed

+82
-4
lines changed

3 files changed

+82
-4
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
113113
STATISTIC(NumIntAssociationsHoisted,
114114
"Number of invariant int expressions "
115115
"reassociated and hoisted out of the loop");
116+
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
117+
"reassociated and hoisted out of the loop");
116118

117119
/// Memory promotion is enabled by default.
118120
static cl::opt<bool>
@@ -2779,6 +2781,75 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
27792781
return true;
27802782
}
27812783

2784+
/// Reassociate general associative binary expressions of the form
2785+
///
2786+
/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2787+
///
2788+
/// where op is an associative binary op, LV is a loop variant, and C1 and C2
2789+
/// are loop invariants.
2790+
///
2791+
/// TODO: This can be extended to more cases such as
2792+
/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
2793+
/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
2794+
/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
2795+
static bool hoistBOAssociation(Instruction &I, Loop &L,
2796+
ICFLoopSafetyInfo &SafetyInfo,
2797+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2798+
DominatorTree *DT) {
2799+
if (!isa<BinaryOperator>(I))
2800+
return false;
2801+
2802+
Instruction::BinaryOps Opcode = dyn_cast<BinaryOperator>(&I)->getOpcode();
2803+
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
2804+
2805+
auto ClearSubclassDataAfterReassociation = [](Instruction &I) {
2806+
FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
2807+
if (!FPMO) {
2808+
I.clearSubclassOptionalData();
2809+
return;
2810+
}
2811+
2812+
FastMathFlags FMF = I.getFastMathFlags();
2813+
I.clearSubclassOptionalData();
2814+
I.setFastMathFlags(FMF);
2815+
};
2816+
2817+
if (I.isAssociative()) {
2818+
// Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2819+
if (Op0 && Op0->getOpcode() == Opcode) {
2820+
Value *LV = Op0->getOperand(0);
2821+
Value *C1 = Op0->getOperand(1);
2822+
Value *C2 = I.getOperand(1);
2823+
2824+
if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
2825+
!L.isLoopInvariant(C2))
2826+
return false;
2827+
2828+
bool singleUseOp0 = Op0->hasOneUse();
2829+
2830+
// Conservatively clear all optional flags since they may not be
2831+
// preserved by the reassociation, but preserve fast-math flags where
2832+
// applicable,
2833+
ClearSubclassDataAfterReassociation(I);
2834+
2835+
auto *Preheader = L.getLoopPreheader();
2836+
assert(Preheader && "Loop is not in simplify form?");
2837+
IRBuilder<> Builder(Preheader->getTerminator());
2838+
Value *V = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
2839+
I.setOperand(0, LV);
2840+
I.setOperand(1, V);
2841+
2842+
// Note: (LV op CV1) might not be erased if it has more than one use.
2843+
if (singleUseOp0)
2844+
eraseInstruction(cast<Instruction>(*Op0), SafetyInfo, MSSAU);
2845+
2846+
return true;
2847+
}
2848+
}
2849+
2850+
return false;
2851+
}
2852+
27822853
static bool hoistArithmetics(Instruction &I, Loop &L,
27832854
ICFLoopSafetyInfo &SafetyInfo,
27842855
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2816,6 +2887,12 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
28162887
return true;
28172888
}
28182889

2890+
if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2891+
++NumHoisted;
2892+
++NumBOAssociationsHoisted;
2893+
return true;
2894+
}
2895+
28192896
return false;
28202897
}
28212898

llvm/test/Transforms/LICM/hoist-binop.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,18 @@ define void @test1(i64 %n) {
1616
; CHECK-NEXT: [[VEC_INIT:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 1, i64 1
1717
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VSCALE_2]], i64 0
1818
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
19+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
20+
; CHECK-NEXT: [[INVARIANT_OP1:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[DOTSPLAT]]
1921
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
2022
; CHECK: for.body:
2123
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[FOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]
2224
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[VEC_INIT]], [[FOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_BODY]] ]
23-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
2425
; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
25-
; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw <vscale x 2 x i64> [[STEP_ADD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
26+
; CHECK-NEXT: [[ADD2:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[INVARIANT_OP]]
2627
; CHECK-NEXT: call void @use(<vscale x 2 x i64> [[ADD1]])
2728
; CHECK-NEXT: call void @use(<vscale x 2 x i64> [[ADD2]])
2829
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE_4]]
29-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[STEP_ADD]], [[DOTSPLAT]]
30+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[INVARIANT_OP1]]
3031
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
3132
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
3233
; CHECK: for.end:

llvm/test/Transforms/LICM/sink-foldable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ define ptr @test2(i32 %j, ptr readonly %P, ptr readnone %Q) {
9797
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ADD_PTR]], i64 [[IDX2_EXT]]
9898
; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
9999
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt ptr [[L1]], [[Q]]
100-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD_I]], 1
100+
; CHECK-NEXT: [[ADD]] = add i32 [[I_ADDR]], 2
101101
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOPEXIT2:%.*]], label [[FOR_COND]]
102102
; CHECK: loopexit0:
103103
; CHECK-NEXT: [[P0:%.*]] = phi ptr [ null, [[FOR_COND]] ]

0 commit comments

Comments
 (0)