Skip to content

Commit 6c097f7

Browse files
committed
[Test] Add more tests demonstrating oddities in behavior of LSR
These tests demonstrate that LSR does not insert IV increment into the latch block (as it supposes to) when it can use an existing Phi as IV rather than creating a new LSR IV.
1 parent bca0619 commit 6c097f7

File tree

2 files changed

+226
-2
lines changed

2 files changed

+226
-2
lines changed

llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
33

44
; TODO: We can get rid of movq here by using different offset and %rax.
5-
define i32 @test(i32* %p, i64 %len, i32 %x) {
6-
; CHECK-LABEL: test:
5+
define i32 @test_01(i32* %p, i64 %len, i32 %x) {
6+
; CHECK-LABEL: test_01:
77
; CHECK: ## %bb.0: ## %entry
88
; CHECK-NEXT: movq %rsi, %rax
99
; CHECK-NEXT: .p2align 4, 0x90
@@ -42,3 +42,89 @@ exit: ; preds = %loop
4242
failure: ; preds = %backedge
4343
unreachable
4444
}
45+
46+
define i32 @test_02(i32* %p, i64 %len, i32 %x) {
47+
; CHECK-LABEL: test_02:
48+
; CHECK: ## %bb.0: ## %entry
49+
; CHECK-NEXT: .p2align 4, 0x90
50+
; CHECK-NEXT: LBB1_1: ## %loop
51+
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
52+
; CHECK-NEXT: testq %rsi, %rsi
53+
; CHECK-NEXT: je LBB1_4
54+
; CHECK-NEXT: ## %bb.2: ## %backedge
55+
; CHECK-NEXT: ## in Loop: Header=BB1_1 Depth=1
56+
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
57+
; CHECK-NEXT: leaq -1(%rsi), %rsi
58+
; CHECK-NEXT: jne LBB1_1
59+
; CHECK-NEXT: ## %bb.3: ## %failure
60+
; CHECK-NEXT: ud2
61+
; CHECK-NEXT: LBB1_4: ## %exit
62+
; CHECK-NEXT: movl $-1, %eax
63+
; CHECK-NEXT: retq
64+
entry:
65+
%start = add i64 %len, -1
66+
br label %loop
67+
68+
loop: ; preds = %backedge, %entry
69+
%iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ]
70+
%iv.next = add nsw i64 %iv, -1
71+
%iv.offset = add i64 %iv, 1
72+
%iv.next.offset = add i64 %iv.next, 1
73+
%cond_1 = icmp eq i64 %iv.offset, 0
74+
br i1 %cond_1, label %exit, label %backedge
75+
76+
backedge: ; preds = %loop
77+
%addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset
78+
%loaded = load atomic i32, i32* %addr unordered, align 4
79+
%cond_2 = icmp eq i32 %loaded, %x
80+
br i1 %cond_2, label %failure, label %loop
81+
82+
exit: ; preds = %loop
83+
ret i32 -1
84+
85+
failure: ; preds = %backedge
86+
unreachable
87+
}
88+
89+
define i32 @test_03(i32* %p, i64 %len, i32 %x) {
90+
; CHECK-LABEL: test_03:
91+
; CHECK: ## %bb.0: ## %entry
92+
; CHECK-NEXT: .p2align 4, 0x90
93+
; CHECK-NEXT: LBB2_1: ## %loop
94+
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
95+
; CHECK-NEXT: testq %rsi, %rsi
96+
; CHECK-NEXT: je LBB2_4
97+
; CHECK-NEXT: ## %bb.2: ## %backedge
98+
; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1
99+
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
100+
; CHECK-NEXT: leaq -1(%rsi), %rsi
101+
; CHECK-NEXT: jne LBB2_1
102+
; CHECK-NEXT: ## %bb.3: ## %failure
103+
; CHECK-NEXT: ud2
104+
; CHECK-NEXT: LBB2_4: ## %exit
105+
; CHECK-NEXT: movl $-1, %eax
106+
; CHECK-NEXT: retq
107+
entry:
108+
%start = add i64 %len, -100
109+
br label %loop
110+
111+
loop: ; preds = %backedge, %entry
112+
%iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ]
113+
%iv.next = add nsw i64 %iv, -1
114+
%iv.offset = add i64 %iv, 100
115+
%iv.next.offset = add i64 %iv.next, 100
116+
%cond_1 = icmp eq i64 %iv.offset, 0
117+
br i1 %cond_1, label %exit, label %backedge
118+
119+
backedge: ; preds = %loop
120+
%addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset
121+
%loaded = load atomic i32, i32* %addr unordered, align 4
122+
%cond_2 = icmp eq i32 %loaded, %x
123+
br i1 %cond_2, label %failure, label %loop
124+
125+
exit: ; preds = %loop
126+
ret i32 -1
127+
128+
failure: ; preds = %backedge
129+
unreachable
130+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -loop-reduce -S | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
5+
target triple = "x86_64-unknown-linux-gnu"
6+
7+
; FIXME: iv.next is supposed to be inserted in the backedge.
8+
define i32 @test_01(i32* %p, i64 %len, i32 %x) {
9+
; CHECK-LABEL: @test_01(
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1
12+
; CHECK-NEXT: br label [[LOOP:%.*]]
13+
; CHECK: loop:
14+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
15+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
16+
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
17+
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
18+
; CHECK: backedge:
19+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[IV]]
20+
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4
21+
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
22+
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
23+
; CHECK: exit:
24+
; CHECK-NEXT: ret i32 -1
25+
; CHECK: failure:
26+
; CHECK-NEXT: unreachable
27+
;
28+
entry:
29+
br label %loop
30+
31+
loop: ; preds = %backedge, %entry
32+
%iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ]
33+
%iv.next = add nsw i64 %iv, -1
34+
%cond_1 = icmp eq i64 %iv, 0
35+
br i1 %cond_1, label %exit, label %backedge
36+
37+
backedge: ; preds = %loop
38+
%addr = getelementptr inbounds i32, i32* %p, i64 %iv.next
39+
%loaded = load atomic i32, i32* %addr unordered, align 4
40+
%cond_2 = icmp eq i32 %loaded, %x
41+
br i1 %cond_2, label %failure, label %loop
42+
43+
exit: ; preds = %loop
44+
ret i32 -1
45+
46+
failure: ; preds = %backedge
47+
unreachable
48+
}
49+
50+
define i32 @test_02(i32* %p, i64 %len, i32 %x) {
51+
; CHECK-LABEL: @test_02(
52+
; CHECK-NEXT: entry:
53+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1
54+
; CHECK-NEXT: br label [[LOOP:%.*]]
55+
; CHECK: loop:
56+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
57+
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[LSR_IV]], 0
58+
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
59+
; CHECK: backedge:
60+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[LSR_IV]]
61+
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4
62+
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
63+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
64+
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
65+
; CHECK: exit:
66+
; CHECK-NEXT: ret i32 -1
67+
; CHECK: failure:
68+
; CHECK-NEXT: unreachable
69+
;
70+
entry:
71+
%start = add i64 %len, -1
72+
br label %loop
73+
74+
loop: ; preds = %backedge, %entry
75+
%iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ]
76+
%iv.next = add nsw i64 %iv, -1
77+
%iv.offset = add i64 %iv, 1
78+
%iv.next.offset = add i64 %iv.next, 1
79+
%cond_1 = icmp eq i64 %iv.offset, 0
80+
br i1 %cond_1, label %exit, label %backedge
81+
82+
backedge: ; preds = %loop
83+
%addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset
84+
%loaded = load atomic i32, i32* %addr unordered, align 4
85+
%cond_2 = icmp eq i32 %loaded, %x
86+
br i1 %cond_2, label %failure, label %loop
87+
88+
exit: ; preds = %loop
89+
ret i32 -1
90+
91+
failure: ; preds = %backedge
92+
unreachable
93+
}
94+
95+
define i32 @test_03(i32* %p, i64 %len, i32 %x) {
96+
; CHECK-LABEL: @test_03(
97+
; CHECK-NEXT: entry:
98+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 -1
99+
; CHECK-NEXT: br label [[LOOP:%.*]]
100+
; CHECK: loop:
101+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
102+
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[LSR_IV]], 0
103+
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
104+
; CHECK: backedge:
105+
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[SCEVGEP]], i64 [[LSR_IV]]
106+
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[SCEVGEP1]] unordered, align 4
107+
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
108+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
109+
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
110+
; CHECK: exit:
111+
; CHECK-NEXT: ret i32 -1
112+
; CHECK: failure:
113+
; CHECK-NEXT: unreachable
114+
;
115+
entry:
116+
%start = add i64 %len, -100
117+
br label %loop
118+
119+
loop: ; preds = %backedge, %entry
120+
%iv = phi i64 [ %iv.next, %backedge ], [ %start, %entry ]
121+
%iv.next = add nsw i64 %iv, -1
122+
%iv.offset = add i64 %iv, 100
123+
%iv.next.offset = add i64 %iv.next, 100
124+
%cond_1 = icmp eq i64 %iv.offset, 0
125+
br i1 %cond_1, label %exit, label %backedge
126+
127+
backedge: ; preds = %loop
128+
%addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.offset
129+
%loaded = load atomic i32, i32* %addr unordered, align 4
130+
%cond_2 = icmp eq i32 %loaded, %x
131+
br i1 %cond_2, label %failure, label %loop
132+
133+
exit: ; preds = %loop
134+
ret i32 -1
135+
136+
failure: ; preds = %backedge
137+
unreachable
138+
}

0 commit comments

Comments
 (0)