Skip to content

Commit c64e53b

Browse files
author
git apple-llvm automerger
committed
Merge commit 'd448848ca46d' from apple/stable/20210107 into swift/main
2 parents 0a8e04d + d448848 commit c64e53b

File tree

4 files changed

+204
-6
lines changed

4 files changed

+204
-6
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ class SimplifyCFGOpt {
247247
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
248248
IRBuilder<> &Builder);
249249

250-
bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI);
250+
bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
251+
bool EqTermsOnly);
251252
bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
252253
const TargetTransformInfo &TTI);
253254
bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
@@ -1348,9 +1349,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
13481349

13491350
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
13501351
/// in the two blocks up into the branch block. The caller of this function
1351-
/// guarantees that BI's block dominates BB1 and BB2.
1352+
/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
1353+
/// only perform hoisting in case both blocks only contain a terminator. In that
1354+
/// case, only the original BI will be replaced and selects for PHIs are added.
13521355
bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
1353-
const TargetTransformInfo &TTI) {
1356+
const TargetTransformInfo &TTI,
1357+
bool EqTermsOnly) {
13541358
// This does very trivial matching, with limited scanning, to find identical
13551359
// instructions in the two blocks. In particular, we don't want to get into
13561360
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1387,6 +1391,16 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
13871391
++NumHoistCommonCode;
13881392
});
13891393

1394+
// Check if only hoisting terminators is allowed. This does not add new
1395+
// instructions to the hoist location.
1396+
if (EqTermsOnly) {
1397+
if (!I1->isIdenticalToWhenDefined(I2))
1398+
return false;
1399+
if (!I1->isTerminator())
1400+
return false;
1401+
goto HoistTerminator;
1402+
}
1403+
13901404
do {
13911405
// If we are hoisting the terminator instruction, don't move one (making a
13921406
// broken BB), instead clone it, and remove BI.
@@ -6498,9 +6512,9 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
64986512
// can hoist it up to the branching block.
64996513
if (BI->getSuccessor(0)->getSinglePredecessor()) {
65006514
if (BI->getSuccessor(1)->getSinglePredecessor()) {
6501-
if (HoistCommon && Options.HoistCommonInsts)
6502-
if (HoistThenElseCodeToIf(BI, TTI))
6503-
return requestResimplify();
6515+
if (HoistCommon &&
6516+
HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
6517+
return requestResimplify();
65046518
} else {
65056519
// If Successor #1 has multiple preds, we may be able to conditionally
65066520
// execute Successor #0 if it branches to Successor #1.
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s
3+
4+
target triple = "arm64-apple-darwin"
5+
6+
; Make sure we can vectorize a loop that uses a function to clamp a double to
7+
; be between a given minimum and maximum value.
8+
9+
define internal double @clamp(double %v) {
10+
entry:
11+
%retval = alloca double, align 8
12+
%v.addr = alloca double, align 8
13+
store double %v, double* %v.addr, align 8
14+
%0 = load double, double* %v.addr, align 8
15+
%cmp = fcmp olt double %0, 0.000000e+00
16+
br i1 %cmp, label %if.then, label %if.end
17+
18+
if.then: ; preds = %entry
19+
store double 0.000000e+00, double* %retval, align 8
20+
br label %return
21+
22+
if.end: ; preds = %entry
23+
%1 = load double, double* %v.addr, align 8
24+
%cmp1 = fcmp ogt double %1, 6.000000e+00
25+
br i1 %cmp1, label %if.then2, label %if.end3
26+
27+
if.then2: ; preds = %if.end
28+
store double 6.000000e+00, double* %retval, align 8
29+
br label %return
30+
31+
if.end3: ; preds = %if.end
32+
%2 = load double, double* %v.addr, align 8
33+
store double %2, double* %retval, align 8
34+
br label %return
35+
36+
return: ; preds = %if.end3, %if.then2, %if.then
37+
%3 = load double, double* %retval, align 8
38+
ret double %3
39+
}
40+
41+
define void @loop(double* %X, double* %Y) {
42+
; CHECK-LABEL: @loop(
43+
; CHECK-NEXT: entry:
44+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000
45+
; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000
46+
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]]
47+
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]]
48+
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
49+
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
50+
; CHECK: vector.body:
51+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
52+
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDEX]] to i64
53+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]]
54+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
55+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0
56+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
57+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
58+
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0
59+
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
60+
; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer
61+
; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00>
62+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], <double 6.000000e+00, double 6.000000e+00>
63+
; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]]
64+
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD11]]
65+
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]]
66+
; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]]
67+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]]
68+
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>*
69+
; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0
70+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2
71+
; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>*
72+
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0
73+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
74+
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
75+
; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
76+
; CHECK: for.cond.cleanup:
77+
; CHECK-NEXT: ret void
78+
; CHECK: for.body:
79+
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
80+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64
81+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]]
82+
; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8
83+
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00
84+
; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00
85+
; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]]
86+
; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]]
87+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]]
88+
; CHECK-NEXT: store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8
89+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1
90+
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999
91+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]]
92+
;
93+
entry:
94+
%X.addr = alloca double*, align 8
95+
%Y.addr = alloca double*, align 8
96+
%i = alloca i32, align 4
97+
store double* %X, double** %X.addr, align 8
98+
store double* %Y, double** %Y.addr, align 8
99+
%0 = bitcast i32* %i to i8*
100+
call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2
101+
store i32 0, i32* %i, align 4
102+
br label %for.cond
103+
104+
for.cond: ; preds = %for.inc, %entry
105+
%1 = load i32, i32* %i, align 4
106+
%cmp = icmp ult i32 %1, 20000
107+
br i1 %cmp, label %for.body, label %for.cond.cleanup
108+
109+
for.cond.cleanup: ; preds = %for.cond
110+
%2 = bitcast i32* %i to i8*
111+
call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #2
112+
br label %for.end
113+
114+
for.body: ; preds = %for.cond
115+
%3 = load double*, double** %Y.addr, align 8
116+
%4 = load i32, i32* %i, align 4
117+
%idxprom = zext i32 %4 to i64
118+
%arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
119+
%5 = load double, double* %arrayidx, align 8
120+
%call = call double @clamp(double %5)
121+
%6 = load double*, double** %X.addr, align 8
122+
%7 = load i32, i32* %i, align 4
123+
%idxprom1 = zext i32 %7 to i64
124+
%arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
125+
store double %call, double* %arrayidx2, align 8
126+
br label %for.inc
127+
128+
for.inc: ; preds = %for.body
129+
%8 = load i32, i32* %i, align 4
130+
%inc = add i32 %8, 1
131+
store i32 %inc, i32* %i, align 4
132+
br label %for.cond
133+
134+
for.end: ; preds = %for.cond.cleanup
135+
ret void
136+
}
137+
138+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
139+
140+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not 'AArch64' in config.root.targets:
2+
config.unsupported = True

llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,45 @@ for.end:
108108
return:
109109
ret void
110110
}
111+
112+
; A example where only the branch instructions from %if.then2 and %if.end3 need
113+
; to be hoisted, which effectively replaces the original branch in %if.end and
114+
; only requires selects for PHIs in the successor.
115+
define float @clamp_float_value(float %value, float %minimum_value, float %maximum_value) {
116+
; HOIST-LABEL: @clamp_float_value(
117+
; HOIST-NEXT: entry:
118+
; HOIST-NEXT: [[CMP:%.*]] = fcmp ogt float [[VALUE:%.*]], [[MAXIMUM_VALUE:%.*]]
119+
; HOIST-NEXT: [[CMP1:%.*]] = fcmp olt float [[VALUE]], [[MINIMUM_VALUE:%.*]]
120+
; HOIST-NEXT: [[MINIMUM_VALUE_VALUE:%.*]] = select i1 [[CMP1]], float [[MINIMUM_VALUE]], float [[VALUE]]
121+
; HOIST-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP]], float [[MAXIMUM_VALUE]], float [[MINIMUM_VALUE_VALUE]]
122+
; HOIST-NEXT: ret float [[RETVAL_0]]
123+
;
124+
; NOHOIST-LABEL: @clamp_float_value(
125+
; NOHOIST-NEXT: entry:
126+
; NOHOIST-NEXT: [[CMP:%.*]] = fcmp ogt float [[VALUE:%.*]], [[MAXIMUM_VALUE:%.*]]
127+
; NOHOIST-NEXT: [[CMP1:%.*]] = fcmp olt float [[VALUE]], [[MINIMUM_VALUE:%.*]]
128+
; NOHOIST-NEXT: [[MINIMUM_VALUE_VALUE:%.*]] = select i1 [[CMP1]], float [[MINIMUM_VALUE]], float [[VALUE]]
129+
; NOHOIST-NEXT: [[RETVAL_0:%.*]] = select i1 [[CMP]], float [[MAXIMUM_VALUE]], float [[MINIMUM_VALUE_VALUE]]
130+
; NOHOIST-NEXT: ret float [[RETVAL_0]]
131+
;
132+
entry:
133+
%cmp = fcmp ogt float %value, %maximum_value
134+
br i1 %cmp, label %if.then, label %if.end
135+
136+
if.then: ; preds = %entry
137+
br label %return
138+
139+
if.end: ; preds = %entry
140+
%cmp1 = fcmp olt float %value, %minimum_value
141+
br i1 %cmp1, label %if.then2, label %if.end3
142+
143+
if.then2: ; preds = %if.end
144+
br label %return
145+
146+
if.end3: ; preds = %if.end
147+
br label %return
148+
149+
return: ; preds = %if.end3, %if.then2, %if.then
150+
%retval.0 = phi float [ %maximum_value, %if.then ], [ %minimum_value, %if.then2 ], [ %value, %if.end3 ]
151+
ret float %retval.0
152+
}

0 commit comments

Comments
 (0)