Skip to content

Commit 5f0d0e6

Browse files
committed
GVN-hoist: fix hoistingFromAllPaths for loops (PR29034)
It is invalid to hoist stores or loads if they are not executed on all paths from the hoisting point to the exit of the function. In the testcase, there are paths in the loop that do not execute the stores or the loads, and so hoisting them within the loop is unsafe. The problem is that the current implementation of hoistingFromAllPaths is incomplete: it walks all blocks dominated by the hoisting point, and does not return false when the loop contains a path on which the hoisted ld/st is not executed. Differential Revision: https://reviews.llvm.org/D23843 llvm-svn: 279732
1 parent 331fb80 commit 5f0d0e6

File tree

3 files changed

+219
-31
lines changed

3 files changed

+219
-31
lines changed

llvm/lib/Transforms/Scalar/GVNHoist.cpp

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -273,24 +273,32 @@ class GVNHoist {
273273
return false;
274274
}
275275

276-
// Return true when all paths from A to the end of the function pass through
277-
// either B or C.
278-
bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
279-
const BasicBlock *C) {
280-
// We fully copy the WL in order to be able to remove items from it.
281-
SmallPtrSet<const BasicBlock *, 2> WL;
282-
WL.insert(B);
283-
WL.insert(C);
284-
285-
for (auto It = df_begin(A), E = df_end(A); It != E;) {
286-
// There exists a path from A to the exit of the function if we are still
287-
// iterating in DF traversal and we removed all instructions from the work
288-
// list.
289-
if (WL.empty())
276+
// Return true when a successor of BB dominates A.
277+
bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
278+
for (const BasicBlock *Succ : BB->getTerminator()->successors())
279+
if (DT->dominates(Succ, A))
280+
return true;
281+
282+
return false;
283+
}
284+
285+
// Return true when all paths from HoistBB to the end of the function pass
286+
// through one of the blocks in WL.
287+
bool hoistingFromAllPaths(const BasicBlock *HoistBB,
288+
SmallPtrSetImpl<const BasicBlock *> &WL) {
289+
290+
// Copy WL as the loop will remove elements from it.
291+
SmallPtrSet<const BasicBlock *, 2> WorkList(WL.begin(), WL.end());
292+
293+
for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) {
294+
// There exists a path from HoistBB to the exit of the function if we are
295+
// still iterating in DF traversal and we removed all instructions from
296+
// the work list.
297+
if (WorkList.empty())
290298
return false;
291299

292300
const BasicBlock *BB = *It;
293-
if (WL.erase(BB)) {
301+
if (WorkList.erase(BB)) {
294302
// Stop DFS traversal when BB is in the work list.
295303
It.skipChildren();
296304
continue;
@@ -300,6 +308,11 @@ class GVNHoist {
300308
if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
301309
return false;
302310

311+
// When reaching the back-edge of a loop, there may be a path through the
312+
// loop that does not pass through B or C before exiting the loop.
313+
if (successorDominate(BB, HoistBB))
314+
return false;
315+
303316
// Increment DFS traversal when not skipping children.
304317
++It;
305318
}
@@ -476,23 +489,21 @@ class GVNHoist {
476489
return true;
477490
}
478491

479-
// Return true when it is safe to hoist scalar instructions from BB1 and BB2
480-
// to HoistBB.
481-
bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1,
482-
const BasicBlock *BB2, int &NBBsOnAllPaths) {
483-
// Check that the hoisted expression is needed on all paths. When HoistBB
484-
// already contains an instruction to be hoisted, the expression is needed
485-
// on all paths. Enable scalar hoisting at -Oz as it is safe to hoist
486-
// scalars to a place where they are partially needed.
487-
if (!OptForMinSize && BB1 != HoistBB &&
488-
!hoistingFromAllPaths(HoistBB, BB1, BB2))
492+
// Return true when it is safe to hoist scalar instructions from all blocks in
493+
// WL to HoistBB.
494+
bool safeToHoistScalar(const BasicBlock *HoistBB,
495+
SmallPtrSetImpl<const BasicBlock *> &WL,
496+
int &NBBsOnAllPaths) {
497+
// Check that the hoisted expression is needed on all paths. Enable scalar
498+
// hoisting at -Oz as it is safe to hoist scalars to a place where they are
499+
// partially needed.
500+
if (!OptForMinSize && !hoistingFromAllPaths(HoistBB, WL))
489501
return false;
490502

491-
if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) ||
492-
hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths))
493-
return false;
503+
for (const BasicBlock *BB : WL)
504+
if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths))
505+
return false;
494506

495-
// Safe to hoist scalars from BB1 and BB2 to HoistBB.
496507
return true;
497508
}
498509

@@ -542,8 +553,12 @@ class GVNHoist {
542553
NewHoistPt = NewHoistBB->getTerminator();
543554
}
544555

556+
SmallPtrSet<const BasicBlock *, 2> WL;
557+
WL.insert(HoistBB);
558+
WL.insert(BB);
559+
545560
if (K == InsKind::Scalar) {
546-
if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) {
561+
if (safeToHoistScalar(NewHoistBB, WL, NBBsOnAllPaths)) {
547562
// Extend HoistPt to NewHoistPt.
548563
HoistPt = NewHoistPt;
549564
HoistBB = NewHoistBB;
@@ -557,7 +572,7 @@ class GVNHoist {
557572
// loading from the same address: for instance there may be a branch on
558573
// which the address of the load may not be initialized.
559574
if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
560-
hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) &&
575+
hoistingFromAllPaths(NewHoistBB, WL)) &&
561576
// Also check that it is safe to move the load or store from HoistPt
562577
// to NewHoistPt, and from Insn to NewHoistPt.
563578
safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) &&
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: opt -S -gvn-hoist < %s | FileCheck %s
2+
3+
; Check that the stores are not hoisted: it is invalid to hoist stores if they
4+
; are not executed on all paths. In this testcase, there are paths in the loop
5+
; that do not execute the stores.
6+
7+
; CHECK-LABEL: define i32 @main
8+
; CHECK: store
9+
; CHECK: store
10+
; CHECK: store
11+
12+
@a = global i32 0, align 4
13+
14+
define i32 @main() {
15+
entry:
16+
br label %for.cond
17+
18+
for.cond: ; preds = %for.inc5, %entry
19+
%0 = load i32, i32* @a, align 4
20+
%cmp = icmp slt i32 %0, 1
21+
br i1 %cmp, label %for.cond1, label %for.end7
22+
23+
for.cond1: ; preds = %for.cond, %for.inc
24+
%1 = load i32, i32* @a, align 4
25+
%cmp2 = icmp slt i32 %1, 1
26+
br i1 %cmp2, label %for.body3, label %for.inc5
27+
28+
for.body3: ; preds = %for.cond1
29+
%tobool = icmp ne i32 %1, 0
30+
br i1 %tobool, label %if.then, label %for.inc
31+
32+
if.then: ; preds = %for.body3
33+
%inc = add nsw i32 %1, 1
34+
store i32 %inc, i32* @a, align 4
35+
br label %for.inc
36+
37+
for.inc: ; preds = %for.body3, %if.then
38+
%2 = load i32, i32* @a, align 4
39+
%inc4 = add nsw i32 %2, 1
40+
store i32 %inc4, i32* @a, align 4
41+
br label %for.cond1
42+
43+
for.inc5: ; preds = %for.cond1
44+
%inc6 = add nsw i32 %1, 1
45+
store i32 %inc6, i32* @a, align 4
46+
br label %for.cond
47+
48+
for.end7: ; preds = %for.cond
49+
ret i32 %0
50+
}
51+
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; RUN: opt -S -gvn-hoist < %s | FileCheck %s
2+
3+
; Check that the stores are not hoisted: it is invalid to hoist stores if they
4+
; are not executed on all paths. In this testcase, there are paths in the loop
5+
; that do not execute the stores.
6+
7+
; CHECK-LABEL: define void @music_task
8+
; CHECK: store
9+
; CHECK: store
10+
; CHECK: store
11+
12+
13+
%struct._MUSIC_OP_API_ = type { %struct._FILE_OPERATE_*, %struct.__MUSIC_API* }
14+
%struct._FILE_OPERATE_ = type { %struct._FILE_OPERATE_INIT_*, %struct._lg_dev_info_* }
15+
%struct._FILE_OPERATE_INIT_ = type { i32, i32, i32, i32, i32*, i8*, i32 }
16+
%struct._lg_dev_info_ = type { %struct.os_event, i32, i32, %struct._lg_dev_hdl_*, i8, i8, i8, i8, i8 }
17+
%struct.os_event = type { i8, i32, i8*, %union.anon }
18+
%union.anon = type { %struct.event_cnt }
19+
%struct.event_cnt = type { i16 }
20+
%struct._lg_dev_hdl_ = type { i8*, i8*, i8*, i8*, i8* }
21+
%struct.__MUSIC_API = type <{ i8*, i8*, i32, %struct._DEC_API, %struct._DEC_API_IO*, %struct._FS_BRK_POINT* }>
22+
%struct._DEC_API = type { %struct._DEC_PHY*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %struct._AAC_DEFAULT_SETTING, i32, i32, i8*, %struct.decoder_inf*, i32, i8, i8*, i8, i8* }
23+
%struct._DEC_PHY = type { i8*, %struct.__audio_decoder_ops*, i8*, %struct.if_decoder_io, %struct.if_dec_file*, i8*, i32 (i8*)*, i32, i8, %struct.__FF_FR }
24+
%struct.__audio_decoder_ops = type { i8*, i32 (i8*, %struct.if_decoder_io*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %struct.decoder_inf* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* }
25+
%struct.if_decoder_io = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* }
26+
%struct.if_dec_file = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* }
27+
%struct.__FF_FR = type { i32, i32, i8, i8, i8 }
28+
%struct._AAC_DEFAULT_SETTING = type { i32, i32, i32 }
29+
%struct.decoder_inf = type { i16, i16, i32, i32 }
30+
%struct._DEC_API_IO = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%struct.decoder_inf*, i32)*, %struct.__OP_IO, i32, i32 }
31+
%struct.__OP_IO = type { i8*, i8* (i8*, i8*, i32)* }
32+
%struct._FS_BRK_POINT = type { %struct._FS_BRK_INFO, i32, i32 }
33+
%struct._FS_BRK_INFO = type { i32, i32, [8 x i8], i8, i8, i16 }
34+
35+
@.str = external hidden unnamed_addr constant [10 x i8], align 1
36+
37+
define void @music_task(i8* nocapture readnone %p) local_unnamed_addr {
38+
entry:
39+
%mapi = alloca %struct._MUSIC_OP_API_*, align 8
40+
%0 = bitcast %struct._MUSIC_OP_API_** %mapi to i8*
41+
call void @llvm.lifetime.start(i64 8, i8* %0)
42+
store %struct._MUSIC_OP_API_* null, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
43+
%call = call i32 @music_decoder_init(%struct._MUSIC_OP_API_** nonnull %mapi)
44+
br label %while.cond
45+
46+
while.cond.loopexit: ; preds = %while.cond2
47+
br label %while.cond
48+
49+
while.cond: ; preds = %while.cond.loopexit, %entry
50+
%1 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
51+
%dop_api = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %1, i64 0, i32 1
52+
%2 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api, align 8, !tbaa !5
53+
%file_num = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %2, i64 0, i32 2
54+
%3 = bitcast i32* %file_num to i8*
55+
%call1 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %1, i32 33, i32 0, i32 28, i8* %3)
56+
br label %while.cond2
57+
58+
while.cond2: ; preds = %while.cond2.backedge, %while.cond
59+
%err.0 = phi i32 [ %call1, %while.cond ], [ %err.0.be, %while.cond2.backedge ]
60+
switch i32 %err.0, label %sw.default [
61+
i32 0, label %while.cond.loopexit
62+
i32 35, label %sw.bb
63+
i32 11, label %sw.bb7
64+
i32 12, label %sw.bb13
65+
]
66+
67+
sw.bb: ; preds = %while.cond2
68+
%4 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
69+
%dop_api4 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %4, i64 0, i32 1
70+
%5 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api4, align 8, !tbaa !5
71+
%file_num5 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %5, i64 0, i32 2
72+
%6 = load i32, i32* %file_num5, align 1, !tbaa !7
73+
%call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i32 %6)
74+
br label %while.cond2.backedge
75+
76+
sw.bb7: ; preds = %while.cond2
77+
%7 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
78+
%dop_api8 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %7, i64 0, i32 1
79+
%8 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api8, align 8, !tbaa !5
80+
%file_num9 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %8, i64 0, i32 2
81+
store i32 1, i32* %file_num9, align 1, !tbaa !7
82+
%9 = bitcast i32* %file_num9 to i8*
83+
%call12 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %7, i32 34, i32 0, i32 24, i8* %9)
84+
br label %while.cond2.backedge
85+
86+
sw.bb13: ; preds = %while.cond2
87+
%10 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
88+
%dop_api14 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %10, i64 0, i32 1
89+
%11 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api14, align 8, !tbaa !5
90+
%file_num15 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %11, i64 0, i32 2
91+
store i32 1, i32* %file_num15, align 1, !tbaa !7
92+
%12 = bitcast i32* %file_num15 to i8*
93+
%call18 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %10, i32 35, i32 0, i32 26, i8* %12)
94+
br label %while.cond2.backedge
95+
96+
sw.default: ; preds = %while.cond2
97+
%13 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1
98+
%call19 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %13, i32 33, i32 0, i32 22, i8* null)
99+
br label %while.cond2.backedge
100+
101+
while.cond2.backedge: ; preds = %sw.default, %sw.bb13, %sw.bb7, %sw.bb
102+
%err.0.be = phi i32 [ %call19, %sw.default ], [ %call18, %sw.bb13 ], [ %call12, %sw.bb7 ], [ 0, %sw.bb ]
103+
br label %while.cond2
104+
}
105+
106+
declare void @llvm.lifetime.start(i64, i8* nocapture)
107+
declare i32 @music_decoder_init(%struct._MUSIC_OP_API_**)
108+
declare i32 @music_play_api(%struct._MUSIC_OP_API_*, i32, i32, i32, i8*)
109+
declare i32 @printf(i8* nocapture readonly, ...)
110+
111+
!0 = !{!"clang version 4.0.0 "}
112+
!1 = !{!2, !2, i64 0}
113+
!2 = !{!"any pointer", !3, i64 0}
114+
!3 = !{!"omnipotent char", !4, i64 0}
115+
!4 = !{!"Simple C/C++ TBAA"}
116+
!5 = !{!6, !2, i64 8}
117+
!6 = !{!"_MUSIC_OP_API_", !2, i64 0, !2, i64 8}
118+
!7 = !{!8, !9, i64 16}
119+
!8 = !{!"__MUSIC_API", !2, i64 0, !2, i64 8, !9, i64 16, !10, i64 20, !2, i64 140, !2, i64 148}
120+
!9 = !{!"int", !3, i64 0}
121+
!10 = !{!"_DEC_API", !2, i64 0, !2, i64 8, !2, i64 16, !2, i64 24, !2, i64 32, !2, i64 40, !11, i64 48, !9, i64 60, !9, i64 64, !2, i64 72, !2, i64 80, !9, i64 88, !3, i64 92, !2, i64 96, !3, i64 104, !2, i64 112}
122+
!11 = !{!"_AAC_DEFAULT_SETTING", !9, i64 0, !9, i64 4, !9, i64 8}

0 commit comments

Comments
 (0)