Skip to content

Commit a27fd12

Browse files
committed
Precommit test for D150447.
1 parent c4a872b commit a27fd12

File tree

1 file changed

+76
-17
lines changed

1 file changed

+76
-17
lines changed

llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll

Lines changed: 76 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
12
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
23

34
; Where the mask of lanes wanting to exit the loop on this iteration is not
45
; obviously already masked by exec (in this case, the xor with -1 inserted by
56
; control flow annotation), then lower control flow must insert an S_AND_B64
67
; with exec.
78

8-
; GCN-LABEL: {{^}}needs_and:
9-
10-
; GCN: s_or_b64 exec, exec, [[REG1:[^ ,]*]]
11-
; GCN: s_andn2_b64 exec, exec, [[REG2:[^ ,]*]]
12-
; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1:[^ ,]*]], [[REG2:[^ ,]*]]
13-
; GCN: s_or_b64 exec, exec, [[REG2:[^ ,]*]]
149
define void @needs_and(i32 %arg) {
10+
; GCN-LABEL: needs_and:
11+
; GCN: ; %bb.0: ; %entry
12+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13+
; GCN-NEXT: s_mov_b32 s8, 1
14+
; GCN-NEXT: s_mov_b64 s[6:7], 0
15+
; GCN-NEXT: s_branch .LBB0_2
16+
; GCN-NEXT: .LBB0_1: ; %endif
17+
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
18+
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
19+
; GCN-NEXT: s_add_i32 s8, s8, 1
20+
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
21+
; GCN-NEXT: s_cbranch_execz .LBB0_4
22+
; GCN-NEXT: .LBB0_2: ; %loop
23+
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
24+
; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
25+
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0
26+
; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
27+
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
28+
; GCN-NEXT: s_cbranch_execz .LBB0_1
29+
; GCN-NEXT: ; %bb.3: ; %then
30+
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
31+
; GCN-NEXT: s_nop 0
32+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
33+
; GCN-NEXT: s_branch .LBB0_1
34+
; GCN-NEXT: .LBB0_4: ; %loopexit
35+
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
36+
; GCN-NEXT: s_waitcnt vmcnt(0)
37+
; GCN-NEXT: s_setpc_b64 s[30:31]
1538
entry:
1639
br label %loop
1740

@@ -36,12 +59,24 @@ loopexit:
3659
; obviously already masked by exec (a V_CMP), then lower control flow can omit
3760
; the S_AND_B64 to avoid an unnecessary instruction.
3861

39-
; GCN-LABEL: {{^}}doesnt_need_and:
40-
; GCN: v_cmp{{[^ ]*}} [[REG1:[^ ,]*]]
41-
; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1]],
42-
; GCN: s_andn2_b64 exec, exec, [[REG2]]
43-
4462
define void @doesnt_need_and(i32 %arg) {
63+
; GCN-LABEL: doesnt_need_and:
64+
; GCN: ; %bb.0: ; %entry
65+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66+
; GCN-NEXT: s_mov_b32 s6, 0
67+
; GCN-NEXT: s_mov_b64 s[4:5], 0
68+
; GCN-NEXT: .LBB1_1: ; %loop
69+
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
70+
; GCN-NEXT: s_add_i32 s6, s6, 1
71+
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
72+
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
73+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
74+
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
75+
; GCN-NEXT: s_cbranch_execnz .LBB1_1
76+
; GCN-NEXT: ; %bb.2: ; %loopexit
77+
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
78+
; GCN-NEXT: s_waitcnt vmcnt(0)
79+
; GCN-NEXT: s_setpc_b64 s[30:31]
4580
entry:
4681
br label %loop
4782

@@ -59,13 +94,37 @@ loopexit:
5994
; Another case where the mask of lanes wanting to exit the loop is not masked
6095
; by exec, because it is a function parameter.
6196

62-
; GCN-LABEL: {{^}}break_cond_is_arg:
63-
; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
64-
; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
65-
; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
66-
; GCN: s_or_b64 [[REG3]], [[REG2]],
67-
6897
define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
98+
; GCN-LABEL: break_cond_is_arg:
99+
; GCN: ; %bb.0: ; %entry
100+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101+
; GCN-NEXT: v_and_b32_e32 v1, 1, v1
102+
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
103+
; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
104+
; GCN-NEXT: s_mov_b32 s10, 1
105+
; GCN-NEXT: s_mov_b64 s[6:7], 0
106+
; GCN-NEXT: s_branch .LBB2_2
107+
; GCN-NEXT: .LBB2_1: ; %endif
108+
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
109+
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
110+
; GCN-NEXT: s_add_i32 s10, s10, 1
111+
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
112+
; GCN-NEXT: s_cbranch_execz .LBB2_4
113+
; GCN-NEXT: .LBB2_2: ; %loop
114+
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
115+
; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
116+
; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
117+
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0
118+
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
119+
; GCN-NEXT: s_cbranch_execz .LBB2_1
120+
; GCN-NEXT: ; %bb.3: ; %then
121+
; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
122+
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
123+
; GCN-NEXT: s_branch .LBB2_1
124+
; GCN-NEXT: .LBB2_4: ; %loopexit
125+
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
126+
; GCN-NEXT: s_waitcnt vmcnt(0)
127+
; GCN-NEXT: s_setpc_b64 s[30:31]
69128
entry:
70129
br label %loop
71130

0 commit comments

Comments
 (0)