1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
1
2
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3
3
4
; Where the mask of lanes wanting to exit the loop on this iteration is not
4
5
; obviously already masked by exec (in this case, the xor with -1 inserted by
5
6
; control flow annotation), then lower control flow must insert an S_AND_B64
6
7
; with exec.
7
8
8
- ; GCN-LABEL: {{^}}needs_and:
9
-
10
- ; GCN: s_or_b64 exec, exec, [[REG1:[^ ,]*]]
11
- ; GCN: s_andn2_b64 exec, exec, [[REG2:[^ ,]*]]
12
- ; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1:[^ ,]*]], [[REG2:[^ ,]*]]
13
- ; GCN: s_or_b64 exec, exec, [[REG2:[^ ,]*]]
14
9
define void @needs_and (i32 %arg ) {
10
+ ; GCN-LABEL: needs_and:
11
+ ; GCN: ; %bb.0: ; %entry
12
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13
+ ; GCN-NEXT: s_mov_b32 s8, 1
14
+ ; GCN-NEXT: s_mov_b64 s[6:7], 0
15
+ ; GCN-NEXT: s_branch .LBB0_2
16
+ ; GCN-NEXT: .LBB0_1: ; %endif
17
+ ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
18
+ ; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
19
+ ; GCN-NEXT: s_add_i32 s8, s8, 1
20
+ ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
21
+ ; GCN-NEXT: s_cbranch_execz .LBB0_4
22
+ ; GCN-NEXT: .LBB0_2: ; %loop
23
+ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
24
+ ; GCN-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v0
25
+ ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s8, v0
26
+ ; GCN-NEXT: s_or_b64 s[6:7], s[4:5], s[6:7]
27
+ ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
28
+ ; GCN-NEXT: s_cbranch_execz .LBB0_1
29
+ ; GCN-NEXT: ; %bb.3: ; %then
30
+ ; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
31
+ ; GCN-NEXT: s_nop 0
32
+ ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
33
+ ; GCN-NEXT: s_branch .LBB0_1
34
+ ; GCN-NEXT: .LBB0_4: ; %loopexit
35
+ ; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
36
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
37
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
15
38
entry:
16
39
br label %loop
17
40
@@ -36,12 +59,24 @@ loopexit:
36
59
; obviously already masked by exec (a V_CMP), then lower control flow can omit
37
60
; the S_AND_B64 to avoid an unnecessary instruction.
38
61
39
- ; GCN-LABEL: {{^}}doesnt_need_and:
40
- ; GCN: v_cmp{{[^ ]*}} [[REG1:[^ ,]*]]
41
- ; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1]],
42
- ; GCN: s_andn2_b64 exec, exec, [[REG2]]
43
-
44
62
define void @doesnt_need_and (i32 %arg ) {
63
+ ; GCN-LABEL: doesnt_need_and:
64
+ ; GCN: ; %bb.0: ; %entry
65
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66
+ ; GCN-NEXT: s_mov_b32 s6, 0
67
+ ; GCN-NEXT: s_mov_b64 s[4:5], 0
68
+ ; GCN-NEXT: .LBB1_1: ; %loop
69
+ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
70
+ ; GCN-NEXT: s_add_i32 s6, s6, 1
71
+ ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
72
+ ; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
73
+ ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
74
+ ; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
75
+ ; GCN-NEXT: s_cbranch_execnz .LBB1_1
76
+ ; GCN-NEXT: ; %bb.2: ; %loopexit
77
+ ; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
78
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
79
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
45
80
entry:
46
81
br label %loop
47
82
@@ -59,13 +94,37 @@ loopexit:
59
94
; Another case where the mask of lanes wanting to exit the loop is not masked
60
95
; by exec, because it is a function parameter.
61
96
62
- ; GCN-LABEL: {{^}}break_cond_is_arg:
63
- ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}}
64
- ; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]]
65
- ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]]
66
- ; GCN: s_or_b64 [[REG3]], [[REG2]],
67
-
68
97
define void @break_cond_is_arg (i32 %arg , i1 %breakcond ) {
98
+ ; GCN-LABEL: break_cond_is_arg:
99
+ ; GCN: ; %bb.0: ; %entry
100
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101
+ ; GCN-NEXT: v_and_b32_e32 v1, 1, v1
102
+ ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
103
+ ; GCN-NEXT: s_xor_b64 s[4:5], vcc, -1
104
+ ; GCN-NEXT: s_mov_b32 s10, 1
105
+ ; GCN-NEXT: s_mov_b64 s[6:7], 0
106
+ ; GCN-NEXT: s_branch .LBB2_2
107
+ ; GCN-NEXT: .LBB2_1: ; %endif
108
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
109
+ ; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
110
+ ; GCN-NEXT: s_add_i32 s10, s10, 1
111
+ ; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
112
+ ; GCN-NEXT: s_cbranch_execz .LBB2_4
113
+ ; GCN-NEXT: .LBB2_2: ; %loop
114
+ ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
115
+ ; GCN-NEXT: s_and_b64 s[8:9], exec, s[4:5]
116
+ ; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7]
117
+ ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s10, v0
118
+ ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
119
+ ; GCN-NEXT: s_cbranch_execz .LBB2_1
120
+ ; GCN-NEXT: ; %bb.3: ; %then
121
+ ; GCN-NEXT: ; in Loop: Header=BB2_2 Depth=1
122
+ ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
123
+ ; GCN-NEXT: s_branch .LBB2_1
124
+ ; GCN-NEXT: .LBB2_4: ; %loopexit
125
+ ; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
126
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
127
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
69
128
entry:
70
129
br label %loop
71
130
0 commit comments