1
- ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2
2
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3
3
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-atomic-optimizer %s | FileCheck -check-prefix=IR %s
4
4
; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@@ -9,29 +9,31 @@ declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32,
9
9
declare void @llvm.amdgcn.struct.buffer.store.format.v4i32 (<4 x i32 >, <4 x i32 >, i32 , i32 , i32 , i32 immarg)
10
10
11
11
define amdgpu_cs void @atomic_add (<4 x i32 > inreg %arg ) {
12
- ; IR-LABEL: @atomic_add(
12
+ ; IR-LABEL: define amdgpu_cs void @atomic_add
13
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
13
14
; IR-NEXT: .entry:
14
15
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
15
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
16
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
17
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
18
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
16
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
17
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
18
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
19
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
19
20
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
20
21
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
21
22
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
22
23
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
23
24
; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
24
25
; IR: 9:
25
- ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
26
+ ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
26
27
; IR-NEXT: br label [[TMP11]]
27
28
; IR: 11:
28
29
; IR-NEXT: ret void
29
30
;
30
31
; GCN-LABEL: atomic_add:
31
32
; GCN: ; %bb.0: ; %.entry
32
33
; GCN-NEXT: s_mov_b64 s[4:5], exec
34
+ ; GCN-NEXT: s_mov_b32 s6, s5
33
35
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0
34
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5 , v0
36
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6 , v0
35
37
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
36
38
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
37
39
; GCN-NEXT: s_cbranch_execz .LBB0_2
@@ -48,20 +50,21 @@ define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg) {
48
50
}
49
51
50
52
define amdgpu_cs void @atomic_add_and_format (<4 x i32 > inreg %arg ) {
51
- ; IR-LABEL: @atomic_add_and_format(
53
+ ; IR-LABEL: define amdgpu_cs void @atomic_add_and_format
54
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
52
55
; IR-NEXT: .entry:
53
56
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
54
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
55
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
56
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
57
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
57
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
58
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
59
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
60
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
58
61
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
59
62
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
60
63
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
61
64
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
62
65
; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
63
66
; IR: 9:
64
- ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
67
+ ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
65
68
; IR-NEXT: br label [[TMP11]]
66
69
; IR: 11:
67
70
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
@@ -73,8 +76,9 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
73
76
; GCN-LABEL: atomic_add_and_format:
74
77
; GCN: ; %bb.0: ; %.entry
75
78
; GCN-NEXT: s_mov_b64 s[6:7], exec
79
+ ; GCN-NEXT: s_mov_b32 s4, s7
76
80
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
77
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7 , v0
81
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s4 , v0
78
82
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
79
83
; GCN-NEXT: ; implicit-def: $vgpr1
80
84
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -103,29 +107,31 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
103
107
}
104
108
105
109
define amdgpu_cs void @atomic_sub (<4 x i32 > inreg %arg ) {
106
- ; IR-LABEL: @atomic_sub(
110
+ ; IR-LABEL: define amdgpu_cs void @atomic_sub
111
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
107
112
; IR-NEXT: .entry:
108
113
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
109
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
110
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
111
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
112
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
114
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
115
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
116
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
117
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
113
118
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
114
119
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
115
120
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
116
121
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
117
122
; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
118
123
; IR: 9:
119
- ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
124
+ ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
120
125
; IR-NEXT: br label [[TMP11]]
121
126
; IR: 11:
122
127
; IR-NEXT: ret void
123
128
;
124
129
; GCN-LABEL: atomic_sub:
125
130
; GCN: ; %bb.0: ; %.entry
126
131
; GCN-NEXT: s_mov_b64 s[4:5], exec
132
+ ; GCN-NEXT: s_mov_b32 s6, s5
127
133
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0
128
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5 , v0
134
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6 , v0
129
135
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
130
136
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
131
137
; GCN-NEXT: s_cbranch_execz .LBB2_2
@@ -142,20 +148,21 @@ define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg) {
142
148
}
143
149
144
150
define amdgpu_cs void @atomic_sub_and_format (<4 x i32 > inreg %arg ) {
145
- ; IR-LABEL: @atomic_sub_and_format(
151
+ ; IR-LABEL: define amdgpu_cs void @atomic_sub_and_format
152
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
146
153
; IR-NEXT: .entry:
147
154
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
148
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
149
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
150
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
151
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
155
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
156
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
157
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
158
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
152
159
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
153
160
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
154
161
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
155
162
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
156
163
; IR-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]]
157
164
; IR: 9:
158
- ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
165
+ ; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
159
166
; IR-NEXT: br label [[TMP11]]
160
167
; IR: 11:
161
168
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], [[TMP9]] ]
@@ -167,8 +174,9 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
167
174
; GCN-LABEL: atomic_sub_and_format:
168
175
; GCN: ; %bb.0: ; %.entry
169
176
; GCN-NEXT: s_mov_b64 s[6:7], exec
177
+ ; GCN-NEXT: s_mov_b32 s4, s7
170
178
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
171
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7 , v0
179
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s4 , v0
172
180
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
173
181
; GCN-NEXT: ; implicit-def: $vgpr1
174
182
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
@@ -197,30 +205,32 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
197
205
}
198
206
199
207
define amdgpu_cs void @atomic_xor (<4 x i32 > inreg %arg ) {
200
- ; IR-LABEL: @atomic_xor(
208
+ ; IR-LABEL: define amdgpu_cs void @atomic_xor
209
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
201
210
; IR-NEXT: .entry:
202
211
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
203
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
204
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
205
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
206
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
212
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
213
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
214
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
215
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
207
216
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
208
217
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
209
218
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
210
219
; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1
211
220
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0
212
221
; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
213
222
; IR: 10:
214
- ; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
223
+ ; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
215
224
; IR-NEXT: br label [[TMP12]]
216
225
; IR: 12:
217
226
; IR-NEXT: ret void
218
227
;
219
228
; GCN-LABEL: atomic_xor:
220
229
; GCN: ; %bb.0: ; %.entry
221
230
; GCN-NEXT: s_mov_b64 s[4:5], exec
231
+ ; GCN-NEXT: s_mov_b32 s6, s5
222
232
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s4, 0
223
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s5 , v0
233
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6 , v0
224
234
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
225
235
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
226
236
; GCN-NEXT: s_cbranch_execz .LBB4_2
@@ -238,21 +248,22 @@ define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg) {
238
248
}
239
249
240
250
define amdgpu_cs void @atomic_xor_and_format (<4 x i32 > inreg %arg ) {
241
- ; IR-LABEL: @atomic_xor_and_format(
251
+ ; IR-LABEL: define amdgpu_cs void @atomic_xor_and_format
252
+ ; IR-SAME: (<4 x i32> inreg [[ARG:%.*]]) {
242
253
; IR-NEXT: .entry:
243
254
; IR-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
244
- ; IR-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32>
245
- ; IR-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1 ]], i32 0
246
- ; IR-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
247
- ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2 ]], i32 0)
255
+ ; IR-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
256
+ ; IR-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0 ]], 32
257
+ ; IR-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
258
+ ; IR-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1 ]], i32 0)
248
259
; IR-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
249
260
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
250
261
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
251
262
; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1
252
263
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0
253
264
; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
254
265
; IR: 10:
255
- ; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG:%.* ]], i32 0, i32 0, i32 0, i32 0)
266
+ ; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
256
267
; IR-NEXT: br label [[TMP12]]
257
268
; IR: 12:
258
269
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], [[TMP10]] ]
@@ -265,8 +276,9 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
265
276
; GCN-LABEL: atomic_xor_and_format:
266
277
; GCN: ; %bb.0: ; %.entry
267
278
; GCN-NEXT: s_mov_b64 s[6:7], exec
279
+ ; GCN-NEXT: s_mov_b32 s4, s7
268
280
; GCN-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
269
- ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s7 , v0
281
+ ; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s4 , v0
270
282
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
271
283
; GCN-NEXT: ; implicit-def: $vgpr1
272
284
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
0 commit comments