Skip to content

Commit 534d886

Browse files
committed
[AMDGPU] add generated checks for some LIT tests
This is in prepration for further changes that affect these tests. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D75403
1 parent 6f029da commit 534d886

File tree

6 files changed

+1086
-552
lines changed

6 files changed

+1086
-552
lines changed

llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
3+
; NOTE: The checks for opt are NOT added by the update script. Those
4+
; checks are looking for the absence of specific metadata, which
5+
; cannot be expressed reliably by the generated checks.
6+
7+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA
18
; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM
29
; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW
310

@@ -9,6 +16,56 @@
916
target triple = "amdgcn-mesa-mesa3d"
1017

1118
define amdgpu_ps void @main(i32 %0, float %1) {
19+
; ISA-LABEL: main:
20+
; ISA: ; %bb.0: ; %start
21+
; ISA-NEXT: v_readfirstlane_b32 s0, v0
22+
; ISA-NEXT: s_mov_b32 m0, s0
23+
; ISA-NEXT: s_mov_b32 s0, 0
24+
; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
25+
; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
26+
; ISA-NEXT: s_mov_b64 s[2:3], 0
27+
; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
28+
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
29+
; ISA-NEXT: s_branch BB0_3
30+
; ISA-NEXT: BB0_1: ; %Flow1
31+
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
32+
; ISA-NEXT: s_or_b64 exec, exec, s[8:9]
33+
; ISA-NEXT: s_add_i32 s0, s0, 1
34+
; ISA-NEXT: s_mov_b64 s[8:9], 0
35+
; ISA-NEXT: BB0_2: ; %Flow
36+
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
37+
; ISA-NEXT: s_and_b64 s[10:11], exec, s[6:7]
38+
; ISA-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]
39+
; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
40+
; ISA-NEXT: s_and_b64 s[8:9], s[8:9], exec
41+
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
42+
; ISA-NEXT: s_andn2_b64 exec, exec, s[2:3]
43+
; ISA-NEXT: s_cbranch_execz BB0_6
44+
; ISA-NEXT: BB0_3: ; %loop
45+
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
46+
; ISA-NEXT: s_or_b64 s[6:7], s[6:7], exec
47+
; ISA-NEXT: s_cmp_lt_u32 s0, 32
48+
; ISA-NEXT: s_mov_b64 s[8:9], -1
49+
; ISA-NEXT: s_cbranch_scc0 BB0_2
50+
; ISA-NEXT: ; %bb.4: ; %endif1
51+
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
52+
; ISA-NEXT: s_mov_b64 s[6:7], -1
53+
; ISA-NEXT: s_and_saveexec_b64 s[8:9], vcc
54+
; ISA-NEXT: s_cbranch_execz BB0_1
55+
; ISA-NEXT: ; %bb.5: ; %endif2
56+
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
57+
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
58+
; ISA-NEXT: s_branch BB0_1
59+
; ISA-NEXT: BB0_6: ; %Flow2
60+
; ISA-NEXT: s_or_b64 exec, exec, s[2:3]
61+
; ISA-NEXT: v_mov_b32_e32 v1, 0
62+
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
63+
; ISA-NEXT: ; %bb.7: ; %if1
64+
; ISA-NEXT: v_sqrt_f32_e32 v1, v0
65+
; ISA-NEXT: ; %bb.8: ; %endloop
66+
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
67+
; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm
68+
; ISA-NEXT: s_endpgm
1269
start:
1370
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
1471
br label %loop

llvm/test/CodeGen/AMDGPU/infinite-loop.ll

Lines changed: 141 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
13
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
24
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s
35

4-
; SI-LABEL: {{^}}infinite_loop:
5-
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
6-
; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop
7-
; SI: s_waitcnt lgkmcnt(0)
8-
; SI: buffer_store_dword [[REG]]
9-
; SI: s_branch [[LOOP]]
106
define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
7+
; SI-LABEL: infinite_loop:
8+
; SI: ; %bb.0: ; %entry
9+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
10+
; SI-NEXT: s_mov_b32 s3, 0xf000
11+
; SI-NEXT: s_mov_b32 s2, -1
12+
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
13+
; SI-NEXT: BB0_1: ; %loop
14+
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
15+
; SI-NEXT: s_waitcnt lgkmcnt(0)
16+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
17+
; SI-NEXT: s_branch BB0_1
18+
; IR-LABEL: @infinite_loop(
19+
; IR-NEXT: entry:
20+
; IR-NEXT: br label [[LOOP:%.*]]
21+
; IR: loop:
22+
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
23+
; IR-NEXT: br label [[LOOP]]
24+
;
1125
entry:
1226
br label %loop
1327

@@ -16,31 +30,36 @@ loop:
1630
br label %loop
1731
}
1832

19-
20-
; IR-LABEL: @infinite_loop_ret(
21-
; IR: br i1 %cond, label %loop, label %UnifiedReturnBlock
22-
23-
; IR: loop:
24-
; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
25-
; IR: br i1 true, label %loop, label %UnifiedReturnBlock
26-
27-
; IR: UnifiedReturnBlock:
28-
; IR: ret void
29-
30-
31-
; SI-LABEL: {{^}}infinite_loop_ret:
32-
; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
33-
34-
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
35-
; SI: s_and_b64 vcc, exec, -1
36-
; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop
37-
; SI: s_waitcnt lgkmcnt(0)
38-
; SI: buffer_store_dword [[REG]]
39-
; SI: s_cbranch_vccnz [[LOOP]]
40-
41-
; SI: [[RET]]: ; %UnifiedReturnBlock
42-
; SI: s_endpgm
4333
define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
34+
; SI-LABEL: infinite_loop_ret:
35+
; SI: ; %bb.0: ; %entry
36+
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
37+
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
38+
; SI-NEXT: s_cbranch_execz BB1_3
39+
; SI-NEXT: ; %bb.1: ; %loop.preheader
40+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
41+
; SI-NEXT: s_mov_b32 s3, 0xf000
42+
; SI-NEXT: s_mov_b32 s2, -1
43+
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
44+
; SI-NEXT: s_and_b64 vcc, exec, -1
45+
; SI-NEXT: BB1_2: ; %loop
46+
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
47+
; SI-NEXT: s_waitcnt lgkmcnt(0)
48+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
49+
; SI-NEXT: s_cbranch_vccnz BB1_2
50+
; SI-NEXT: BB1_3: ; %UnifiedReturnBlock
51+
; SI-NEXT: s_endpgm
52+
; IR-LABEL: @infinite_loop_ret(
53+
; IR-NEXT: entry:
54+
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
55+
; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1
56+
; IR-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
57+
; IR: loop:
58+
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
59+
; IR-NEXT: br i1 true, label [[LOOP]], label [[UNIFIEDRETURNBLOCK]]
60+
; IR: UnifiedReturnBlock:
61+
; IR-NEXT: ret void
62+
;
4463
entry:
4564
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
4665
%cond = icmp eq i32 %tmp, 1
@@ -54,44 +73,44 @@ return:
5473
ret void
5574
}
5675

57-
58-
; IR-LABEL: @infinite_loops(
59-
; IR: br i1 undef, label %loop1, label %loop2
60-
61-
; IR: loop1:
62-
; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
63-
; IR: br i1 true, label %loop1, label %DummyReturnBlock
64-
65-
; IR: loop2:
66-
; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4
67-
; IR: br i1 true, label %loop2, label %DummyReturnBlock
68-
69-
; IR: DummyReturnBlock:
70-
; IR: ret void
71-
72-
73-
; SI-LABEL: {{^}}infinite_loops:
74-
75-
; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7
76-
; SI: s_and_b64 vcc, exec, -1
77-
78-
; SI: [[LOOP1:BB[0-9]+_[0-9]+]]: ; %loop1
79-
; SI: s_waitcnt lgkmcnt(0)
80-
; SI: buffer_store_dword [[REG1]]
81-
; SI: s_cbranch_vccnz [[LOOP1]]
82-
; SI: s_branch [[RET:BB[0-9]+_[0-9]+]]
83-
84-
; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378
85-
; SI: s_and_b64 vcc, exec, -1
86-
87-
; SI: [[LOOP2:BB[0-9]+_[0-9]+]]: ; %loop2
88-
; SI: s_waitcnt lgkmcnt(0)
89-
; SI: buffer_store_dword [[REG2]]
90-
; SI: s_cbranch_vccnz [[LOOP2]]
91-
92-
; SI: [[RET]]: ; %DummyReturnBlock
93-
; SI: s_endpgm
9476
define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
77+
; SI-LABEL: infinite_loops:
78+
; SI: ; %bb.0: ; %entry
79+
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
80+
; SI-NEXT: s_mov_b32 s3, 0xf000
81+
; SI-NEXT: s_mov_b32 s2, -1
82+
; SI-NEXT: s_cbranch_scc0 BB2_3
83+
; SI-NEXT: ; %bb.1: ; %loop1.preheader
84+
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
85+
; SI-NEXT: s_and_b64 vcc, exec, -1
86+
; SI-NEXT: BB2_2: ; %loop1
87+
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
88+
; SI-NEXT: s_waitcnt lgkmcnt(0)
89+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
90+
; SI-NEXT: s_cbranch_vccnz BB2_2
91+
; SI-NEXT: s_branch BB2_5
92+
; SI-NEXT: BB2_3:
93+
; SI-NEXT: v_mov_b32_e32 v0, 0x378
94+
; SI-NEXT: s_and_b64 vcc, exec, -1
95+
; SI-NEXT: BB2_4: ; %loop2
96+
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
97+
; SI-NEXT: s_waitcnt lgkmcnt(0)
98+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
99+
; SI-NEXT: s_cbranch_vccnz BB2_4
100+
; SI-NEXT: BB2_5: ; %DummyReturnBlock
101+
; SI-NEXT: s_endpgm
102+
; IR-LABEL: @infinite_loops(
103+
; IR-NEXT: entry:
104+
; IR-NEXT: br i1 undef, label [[LOOP1:%.*]], label [[LOOP2:%.*]]
105+
; IR: loop1:
106+
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
107+
; IR-NEXT: br i1 true, label [[LOOP1]], label [[DUMMYRETURNBLOCK:%.*]]
108+
; IR: loop2:
109+
; IR-NEXT: store volatile i32 888, i32 addrspace(1)* [[OUT]], align 4
110+
; IR-NEXT: br i1 true, label [[LOOP2]], label [[DUMMYRETURNBLOCK]]
111+
; IR: DummyReturnBlock:
112+
; IR-NEXT: ret void
113+
;
95114
entry:
96115
br i1 undef, label %loop1, label %loop2
97116

@@ -104,55 +123,68 @@ loop2:
104123
br label %loop2
105124
}
106125

107-
108-
109-
; IR-LABEL: @infinite_loop_nest_ret(
110-
; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock
111-
112-
; IR: outer_loop:
113-
; IR: br label %inner_loop
114-
115-
; IR: inner_loop:
116-
; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
117-
; IR: %cond3 = icmp eq i32 %tmp, 3
118-
; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock
119-
120-
; IR: TransitionBlock:
121-
; IR: br i1 %cond3, label %inner_loop, label %outer_loop
122-
123-
; IR: UnifiedReturnBlock:
124-
; IR: ret void
125-
126-
; SI-LABEL: {{^}}infinite_loop_nest_ret:
127-
; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
128-
129-
; SI: s_mov_b32
130-
; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %outer_loop
131-
132-
; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %inner_loop
133-
; SI: s_waitcnt expcnt(0)
134-
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
135-
; SI: s_waitcnt lgkmcnt(0)
136-
; SI: buffer_store_dword [[REG]]
137-
138-
; SI: s_andn2_b64 exec
139-
; SI: s_cbranch_execnz [[INNER_LOOP]]
140-
141-
; SI: s_andn2_b64 exec
142-
; SI: s_cbranch_execnz [[OUTER_LOOP]]
143-
144-
; SI: [[RET]]: ; %UnifiedReturnBlock
145-
; SI: s_endpgm
146126
define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
127+
; SI-LABEL: infinite_loop_nest_ret:
128+
; SI: ; %bb.0: ; %entry
129+
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
130+
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
131+
; SI-NEXT: s_cbranch_execz BB3_5
132+
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
133+
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
134+
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
135+
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
136+
; SI-NEXT: s_mov_b64 s[2:3], 0
137+
; SI-NEXT: s_mov_b32 s7, 0xf000
138+
; SI-NEXT: s_mov_b32 s6, -1
139+
; SI-NEXT: BB3_2: ; %outer_loop
140+
; SI-NEXT: ; =>This Loop Header: Depth=1
141+
; SI-NEXT: ; Child Loop BB3_3 Depth 2
142+
; SI-NEXT: s_and_b64 s[8:9], exec, vcc
143+
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
144+
; SI-NEXT: s_mov_b64 s[8:9], 0
145+
; SI-NEXT: BB3_3: ; %inner_loop
146+
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
147+
; SI-NEXT: ; => This Inner Loop Header: Depth=2
148+
; SI-NEXT: s_and_b64 s[10:11], exec, s[0:1]
149+
; SI-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
150+
; SI-NEXT: s_waitcnt expcnt(0)
151+
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
152+
; SI-NEXT: s_waitcnt lgkmcnt(0)
153+
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
154+
; SI-NEXT: s_andn2_b64 exec, exec, s[8:9]
155+
; SI-NEXT: s_cbranch_execnz BB3_3
156+
; SI-NEXT: ; %bb.4: ; %Flow
157+
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
158+
; SI-NEXT: s_or_b64 exec, exec, s[8:9]
159+
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
160+
; SI-NEXT: s_cbranch_execnz BB3_2
161+
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
162+
; SI-NEXT: s_endpgm
163+
; IR-LABEL: @infinite_loop_nest_ret(
164+
; IR-NEXT: entry:
165+
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
166+
; IR-NEXT: [[COND1:%.*]] = icmp eq i32 [[TMP]], 1
167+
; IR-NEXT: br i1 [[COND1]], label [[OUTER_LOOP:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
168+
; IR: outer_loop:
169+
; IR-NEXT: br label [[INNER_LOOP:%.*]]
170+
; IR: inner_loop:
171+
; IR-NEXT: store volatile i32 999, i32 addrspace(1)* [[OUT:%.*]], align 4
172+
; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
173+
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK]]
174+
; IR: TransitionBlock:
175+
; IR-NEXT: br i1 [[COND3]], label [[INNER_LOOP]], label [[OUTER_LOOP]]
176+
; IR: UnifiedReturnBlock:
177+
; IR-NEXT: ret void
178+
;
147179
entry:
148180
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
149181
%cond1 = icmp eq i32 %tmp, 1
150182
br i1 %cond1, label %outer_loop, label %return
151183

152184
outer_loop:
153-
; %cond2 = icmp eq i32 %tmp, 2
154-
; br i1 %cond2, label %outer_loop, label %inner_loop
155-
br label %inner_loop
185+
; %cond2 = icmp eq i32 %tmp, 2
186+
; br i1 %cond2, label %outer_loop, label %inner_loop
187+
br label %inner_loop
156188

157189
inner_loop: ; preds = %LeafBlock, %LeafBlock1
158190
store volatile i32 999, i32 addrspace(1)* %out, align 4

0 commit comments

Comments
 (0)