Skip to content

Commit 0ba63b2

Browse files
committed
[SCEV] Add additional test coverage for loop-guards reasoning.
Add additional tests showing missed opportunities when using loop guards for reasoning in SCEV, depending on the order the guards appear in the IR.
1 parent b68565b commit 0ba63b2

File tree

7 files changed

+944
-211
lines changed

7 files changed

+944
-211
lines changed

llvm/test/Analysis/LoopAccessAnalysis/no-dep-via-loop-guards.ll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,90 @@ loop:
349349
exit:
350350
ret void
351351
}
352+
353+
; TODO Should be able to determine no-dep, same as @nodep_via_logical_and_2.
354+
define void @nodep_via_logical_and_1(ptr %A, i32 %index, i32 %n) {
355+
; CHECK-LABEL: 'nodep_via_logical_and_1'
356+
; CHECK-NEXT: loop:
357+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
358+
; CHECK-NEXT: Unknown data dependence.
359+
; CHECK-NEXT: Dependences:
360+
; CHECK-NEXT: Unknown:
361+
; CHECK-NEXT: %0 = load double, ptr %gep.load, align 8 ->
362+
; CHECK-NEXT: store double %0, ptr %gep.store, align 8
363+
; CHECK-EMPTY:
364+
; CHECK-NEXT: Run-time memory checks:
365+
; CHECK-NEXT: Grouped accesses:
366+
; CHECK-EMPTY:
367+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
368+
; CHECK-NEXT: SCEV assumptions:
369+
; CHECK-EMPTY:
370+
; CHECK-NEXT: Expressions re-written:
371+
;
372+
entry:
373+
%pre.0 = icmp sgt i32 %index, 0
374+
%pre.1 = icmp slt i32 %index, %n
375+
%and.pre = select i1 %pre.1, i1 %pre.0, i1 false
376+
br i1 %and.pre, label %ph, label %exit
377+
378+
ph:
379+
%idx.1 = add i32 %index, 1
380+
%start = zext i32 %idx.1 to i64
381+
br label %loop
382+
383+
loop:
384+
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
385+
%gep.load = getelementptr double, ptr %A, i64 %iv
386+
%1 = load double, ptr %gep.load, align 8
387+
%index.ext = zext i32 %index to i64
388+
%gep.store = getelementptr double, ptr %A, i64 %index.ext
389+
store double %1, ptr %gep.store, align 8
390+
%iv.next = add i64 %iv, 1
391+
%t = trunc i64 %iv to i32
392+
%ec = icmp slt i32 %t, 1
393+
br i1 %ec, label %loop, label %exit
394+
395+
exit:
396+
ret void
397+
}
398+
399+
; Same as nodep_via_logical_and_1 but with different operand order of the logical and.
400+
define void @nodep_via_logical_and_2(ptr %A, i32 %index, i32 %n) {
401+
; CHECK-LABEL: 'nodep_via_logical_and_2'
402+
; CHECK-NEXT: loop:
403+
; CHECK-NEXT: Memory dependences are safe
404+
; CHECK-NEXT: Dependences:
405+
; CHECK-NEXT: Run-time memory checks:
406+
; CHECK-NEXT: Grouped accesses:
407+
; CHECK-EMPTY:
408+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
409+
; CHECK-NEXT: SCEV assumptions:
410+
; CHECK-EMPTY:
411+
; CHECK-NEXT: Expressions re-written:
412+
;
413+
entry:
414+
%pre.0 = icmp sgt i32 %index, 0
415+
%pre.1 = icmp slt i32 %index, %n
416+
%and.pre = select i1 %pre.0, i1 %pre.1, i1 false
417+
br i1 %and.pre, label %ph, label %exit
418+
419+
ph:
420+
%idx.1 = add i32 %index, 1
421+
%start = zext i32 %idx.1 to i64
422+
br label %loop
423+
424+
loop:
425+
%iv = phi i64 [ %start, %ph ], [ %iv.next, %loop ]
426+
%gep.load = getelementptr double, ptr %A, i64 %iv
427+
%1 = load double, ptr %gep.load, align 8
428+
%index.ext = zext i32 %index to i64
429+
%gep.store = getelementptr double, ptr %A, i64 %index.ext
430+
store double %1, ptr %gep.store, align 8
431+
%iv.next = add i64 %iv, 1
432+
%t = trunc i64 %iv to i32
433+
%ec = icmp slt i32 %t, 1
434+
br i1 %ec, label %loop, label %exit
435+
436+
exit:
437+
ret void
438+
}
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2+
; RUN: opt -passes='print<scalar-evolution>' -disable-output %s 2>&1 | FileCheck %s
3+
4+
define void @test_multiple_const_guards_order1(ptr nocapture %a, i64 %i) {
5+
; CHECK-LABEL: 'test_multiple_const_guards_order1'
6+
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order1
7+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
8+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
9+
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
10+
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
11+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
12+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
13+
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order1
14+
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
15+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
16+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
17+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
18+
;
19+
entry:
20+
%c.1 = icmp ult i64 %i, 16
21+
br i1 %c.1, label %guardbb, label %exit
22+
23+
guardbb:
24+
%c.2 = icmp ult i64 %i, 10
25+
br i1 %c.2, label %loop, label %exit
26+
27+
loop:
28+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
29+
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
30+
store i32 1, ptr %idx, align 4
31+
%iv.next = add nuw nsw i64 %iv, 1
32+
%exitcond = icmp eq i64 %iv, %i
33+
br i1 %exitcond, label %exit, label %loop
34+
35+
exit:
36+
ret void
37+
}
38+
39+
define void @test_multiple_const_guards_order2(ptr nocapture %a, i64 %i) {
40+
; CHECK-LABEL: 'test_multiple_const_guards_order2'
41+
; CHECK-NEXT: Classifying expressions for: @test_multiple_const_guards_order2
42+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
43+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,10) S: [0,10) Exits: %i LoopDispositions: { %loop: Computable }
44+
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
45+
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
46+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
47+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,11) S: [1,11) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
48+
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_const_guards_order2
49+
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
50+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9
51+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
52+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
53+
;
54+
entry:
55+
%c.1 = icmp ult i64 %i, 10
56+
br i1 %c.1, label %guardbb, label %exit
57+
58+
guardbb:
59+
%c.2 = icmp ult i64 %i, 16
60+
br i1 %c.2, label %loop, label %exit
61+
62+
loop:
63+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
64+
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
65+
store i32 1, ptr %idx, align 4
66+
%iv.next = add nuw nsw i64 %iv, 1
67+
%exitcond = icmp eq i64 %iv, %i
68+
br i1 %exitcond, label %exit, label %loop
69+
70+
exit:
71+
ret void
72+
}
73+
74+
define void @test_multiple_var_guards_order1(ptr nocapture %a, i64 %i, i64 %N) {
75+
; CHECK-LABEL: 'test_multiple_var_guards_order1'
76+
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order1
77+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
78+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
79+
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
80+
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
81+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
82+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
83+
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order1
84+
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
85+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
86+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
87+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
88+
;
89+
entry:
90+
%c.1 = icmp ult i64 %N, 12
91+
br i1 %c.1, label %guardbb, label %exit
92+
93+
guardbb:
94+
%c.2 = icmp ult i64 %i, %N
95+
br i1 %c.2, label %loop, label %exit
96+
97+
loop:
98+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
99+
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
100+
store i32 1, ptr %idx, align 4
101+
%iv.next = add nuw nsw i64 %iv, 1
102+
%exitcond = icmp eq i64 %iv, %i
103+
br i1 %exitcond, label %exit, label %loop
104+
105+
exit:
106+
ret void
107+
}
108+
109+
define void @test_multiple_var_guards_order2(ptr nocapture %a, i64 %i, i64 %N) {
110+
; CHECK-LABEL: 'test_multiple_var_guards_order2'
111+
; CHECK-NEXT: Classifying expressions for: @test_multiple_var_guards_order2
112+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
113+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,11) S: [0,11) Exits: %i LoopDispositions: { %loop: Computable }
114+
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
115+
; CHECK-NEXT: --> {%a,+,4}<nuw><%loop> U: full-set S: full-set Exits: ((4 * %i) + %a) LoopDispositions: { %loop: Computable }
116+
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
117+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,12) S: [1,12) Exits: (1 + %i) LoopDispositions: { %loop: Computable }
118+
; CHECK-NEXT: Determining loop execution counts for: @test_multiple_var_guards_order2
119+
; CHECK-NEXT: Loop %loop: backedge-taken count is %i
120+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 10
121+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %i
122+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
123+
;
124+
entry:
125+
%c.1 = icmp ult i64 %i, %N
126+
br i1 %c.1, label %guardbb, label %exit
127+
128+
guardbb:
129+
%c.2 = icmp ult i64 %N, 12
130+
br i1 %c.2, label %loop, label %exit
131+
132+
loop:
133+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %guardbb ]
134+
%idx = getelementptr inbounds i32, ptr %a, i64 %iv
135+
store i32 1, ptr %idx, align 4
136+
%iv.next = add nuw nsw i64 %iv, 1
137+
%exitcond = icmp eq i64 %iv, %i
138+
br i1 %exitcond, label %exit, label %loop
139+
140+
exit:
141+
ret void
142+
}
143+
144+
define i32 @sle_sgt_ult_umax_to_smax(i32 %num) {
145+
; CHECK-LABEL: 'sle_sgt_ult_umax_to_smax'
146+
; CHECK-NEXT: Classifying expressions for: @sle_sgt_ult_umax_to_smax
147+
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
148+
; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,25) S: [0,25) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
149+
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
150+
; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,29) S: [4,29) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
151+
; CHECK-NEXT: Determining loop execution counts for: @sle_sgt_ult_umax_to_smax
152+
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
153+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 6
154+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
155+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
156+
;
157+
guard.1:
158+
%cmp.1 = icmp sle i32 %num, 0
159+
br i1 %cmp.1, label %exit, label %guard.2
160+
161+
guard.2:
162+
%cmp.2 = icmp sgt i32 %num, 28
163+
br i1 %cmp.2, label %exit, label %guard.3
164+
165+
guard.3:
166+
%cmp.3 = icmp ult i32 %num, 4
167+
br i1 %cmp.3, label %exit, label %loop
168+
169+
loop:
170+
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
171+
%iv.next = add nuw i32 %iv, 4
172+
%ec = icmp eq i32 %iv.next, %num
173+
br i1 %ec, label %exit, label %loop
174+
175+
exit:
176+
ret i32 0
177+
}
178+
179+
; Similar to @sle_sgt_ult_umax_to_smax but with different predicate order.
180+
define i32 @ult_sle_sgt_umax_to_smax(i32 %num) {
181+
; CHECK-LABEL: 'ult_sle_sgt_umax_to_smax'
182+
; CHECK-NEXT: Classifying expressions for: @ult_sle_sgt_umax_to_smax
183+
; CHECK-NEXT: %iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
184+
; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * ((-4 + %num) /u 4))<nuw> LoopDispositions: { %loop: Computable }
185+
; CHECK-NEXT: %iv.next = add nuw i32 %iv, 4
186+
; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-2147483648,2147483645) Exits: (4 + (4 * ((-4 + %num) /u 4))<nuw>) LoopDispositions: { %loop: Computable }
187+
; CHECK-NEXT: Determining loop execution counts for: @ult_sle_sgt_umax_to_smax
188+
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + %num) /u 4)
189+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 1073741823
190+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + %num) /u 4)
191+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
192+
;
193+
guard.1:
194+
%cmp.1 = icmp ult i32 %num, 4
195+
br i1 %cmp.1, label %exit, label %guard.2
196+
197+
guard.2:
198+
%cmp.2 = icmp sgt i32 %num, 28
199+
br i1 %cmp.2, label %exit, label %guard.3
200+
201+
guard.3:
202+
%cmp.3 = icmp sle i32 %num, 0
203+
br i1 %cmp.3, label %exit, label %loop
204+
205+
loop:
206+
%iv = phi i32 [ 0, %guard.3 ], [ %iv.next, %loop ]
207+
%iv.next = add nuw i32 %iv, 4
208+
%ec = icmp eq i32 %iv.next, %num
209+
br i1 %ec, label %exit, label %loop
210+
211+
exit:
212+
ret i32 0
213+
}
214+
215+
define void @const_max_btc_32_or_order_1(i64 %n) {
216+
; CHECK-LABEL: 'const_max_btc_32_or_order_1'
217+
; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_1
218+
; CHECK-NEXT: %and.pre = and i1 %pre.1, %pre.0
219+
; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set
220+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
221+
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable }
222+
; CHECK-NEXT: %iv.next = add i64 %iv, 1
223+
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: (1 + %n) LoopDispositions: { %loop: Computable }
224+
; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_1
225+
; CHECK-NEXT: Loop %loop: backedge-taken count is %n
226+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9223372036854775807
227+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n
228+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
229+
;
230+
entry:
231+
%pre.0 = icmp slt i64 %n, 33
232+
%pre.1 = icmp ne i64 %n, 0
233+
%and.pre = and i1 %pre.1, %pre.0
234+
br i1 %and.pre, label %ph, label %exit
235+
236+
ph:
237+
%pre.2 = icmp sgt i64 %n, 0
238+
br i1 %pre.2, label %loop, label %exit
239+
240+
loop:
241+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
242+
call void @foo()
243+
%iv.next = add i64 %iv, 1
244+
%ec = icmp eq i64 %iv, %n
245+
br i1 %ec, label %exit, label %loop
246+
247+
exit:
248+
ret void
249+
}
250+
251+
; Same as @const_max_btc_32_or_order_1, but with operands in the OR swapped.
252+
define void @const_max_btc_32_or_order_2(i64 %n) {
253+
; CHECK-LABEL: 'const_max_btc_32_or_order_2'
254+
; CHECK-NEXT: Classifying expressions for: @const_max_btc_32_or_order_2
255+
; CHECK-NEXT: %and.pre = and i1 %pre.0, %pre.1
256+
; CHECK-NEXT: --> (%pre.0 umin %pre.1) U: full-set S: full-set
257+
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
258+
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,33) S: [0,33) Exits: %n LoopDispositions: { %loop: Computable }
259+
; CHECK-NEXT: %iv.next = add i64 %iv, 1
260+
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,34) S: [1,34) Exits: (1 + %n) LoopDispositions: { %loop: Computable }
261+
; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_2
262+
; CHECK-NEXT: Loop %loop: backedge-taken count is %n
263+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 32
264+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %n
265+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
266+
;
267+
entry:
268+
%pre.0 = icmp slt i64 %n, 33
269+
%pre.1 = icmp ne i64 %n, 0
270+
%and.pre = and i1 %pre.0, %pre.1
271+
br i1 %and.pre, label %ph, label %exit
272+
273+
ph:
274+
%pre.2 = icmp sgt i64 %n, 0
275+
br i1 %pre.2, label %loop, label %exit
276+
277+
loop:
278+
%iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
279+
call void @foo()
280+
%iv.next = add i64 %iv, 1
281+
%ec = icmp eq i64 %iv, %n
282+
br i1 %ec, label %exit, label %loop
283+
284+
exit:
285+
ret void
286+
}
287+
288+
declare void @foo()

0 commit comments

Comments
 (0)