Skip to content

Commit 8a65ee8

Browse files
authored
[AMDGPU] don't mark control-flow intrinsics as convergent (#90026)
This is really a workaround to allow control flow lowering in the presence of convergence control tokens. Control-flow intrinsics in LLVM IR are convergent because they indirectly represent the wave CFG, i.e., sets of threads that are "converged" or "execute in lock-step". But they exist during a small window in the lowering process, inserted after the structurizer and then translated to equivalent MIR pseudos. So rather than create convergence tokens for these builtins, we simply mark them as not convergent. The corresponding MIR pseudos are marked as having side effects, which is sufficient to prevent optimizations without having to mark them as convergent.
1 parent d3dad7a commit 8a65ee8

17 files changed

+244
-232
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3143,25 +3143,37 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
31433143
// Special Intrinsics for backend internal use only. No frontend
31443144
// should emit calls to these.
31453145
// ===----------------------------------------------------------------------===//
3146+
//
3147+
// Control-flow intrinsics in LLVM IR are convergent because they represent the
3148+
// wave CFG, i.e., sets of threads that are "converged" or "execute in
3149+
// lock-step". But they exist during a small window in the lowering process,
3150+
// inserted after the structurizer and then translated to equivalent MIR
3151+
// pseudos. So rather than create convergence tokens for these builtins, we
3152+
// simply mark them as not convergent.
3153+
//
3154+
// This is really a workaround to allow control flow lowering in the presence of
3155+
// convergence control tokens. The corresponding MIR pseudos are marked as
3156+
// having side effects, which is sufficient to prevent optimizations without
3157+
// having to mark them as convergent.
31463158
def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
3147-
[llvm_i1_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
3159+
[llvm_i1_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
31483160
>;
31493161

31503162
def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
3151-
[llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
3163+
[llvm_anyint_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
31523164
>;
31533165

31543166
def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
31553167
[llvm_i1_ty, LLVMMatchType<0>],
3156-
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
3168+
[IntrNoMem, IntrWillReturn, IntrNoCallback, IntrNoFree]
31573169
>;
31583170

31593171
def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
3160-
[llvm_anyint_ty], [IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
3172+
[llvm_anyint_ty], [IntrWillReturn, IntrNoCallback, IntrNoFree]
31613173
>;
31623174

31633175
def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty],
3164-
[IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
3176+
[IntrWillReturn, IntrNoCallback, IntrNoFree]>;
31653177

31663178
// Represent unreachable in a divergent region.
31673179
def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent, IntrNoCallback, IntrNoFree]>;

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/hidden-diverge-gmir.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
55
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
66
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
7-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
8-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
7+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
8+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
99
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
1010
# CHECK: DIVERGENT: G_BR %bb.2
1111
# CHECK-LABEL: BLOCK bb.1
1212
# CHECK-LABEL: BLOCK bb.2
1313
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
1414
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
15-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
16-
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
15+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
16+
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
1717
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
1818
# CHECK: DIVERGENT: G_BR %bb.4
1919
# CHECK-LABEL: BLOCK bb.3
@@ -44,7 +44,7 @@ body: |
4444
%14:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
4545
%16:_(s1) = G_ICMP intpred(slt), %14(s32), %15
4646
%18:_(s1) = G_XOR %16, %17
47-
%19:_(s1), %20:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
47+
%19:_(s1), %20:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
4848
G_BRCOND %19(s1), %bb.2
4949
G_BR %bb.3
5050
@@ -60,8 +60,8 @@ body: |
6060
6161
%25:_(s32) = G_PHI %22(s32), %bb.2, %33(s32), %bb.1
6262
%26:_(s1) = G_PHI %24(s1), %bb.2, %18(s1), %bb.1
63-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
64-
%27:_(s1), %28:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
63+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
64+
%27:_(s1), %28:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
6565
G_BRCOND %27(s1), %bb.4
6666
G_BR %bb.5
6767
@@ -72,7 +72,7 @@ body: |
7272
7373
bb.5:
7474
%31:_(s32) = G_PHI %25(s32), %bb.3, %29(s32), %bb.4
75-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
75+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
7676
G_STORE %31(s32), %32(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
7777
S_ENDPGM 0
7878

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/temporal-divergence.mir

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ body: |
2727
2828
%11:_(s64) = G_PHI %12(s64), %bb.2, %15(s64), %bb.1
2929
%18:_(s1) = G_CONSTANT i1 false
30-
%12:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
30+
%12:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
3131
; CHECK: DIVERGENT: SI_LOOP
3232
SI_LOOP %12(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
3333
G_BR %bb.3
3434
3535
bb.3:
3636
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
3737
%14:_(s64) = G_PHI %12(s64), %bb.2
38-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
38+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
3939
S_ENDPGM 0
4040
4141
...
@@ -82,15 +82,15 @@ body: |
8282
successors: %bb.5, %bb.4
8383
8484
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
85-
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
85+
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
8686
; CHECK: DIVERGENT: SI_LOOP
8787
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
8888
G_BR %bb.5
8989
9090
bb.5:
9191
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
9292
%18:_(s64) = G_PHI %16(s64), %bb.4
93-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
93+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
9494
G_BR %bb.3
9595
9696
bb.6:
@@ -140,15 +140,15 @@ body: |
140140
successors: %bb.5, %bb.4
141141
142142
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
143-
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
143+
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
144144
; CHECK: DIVERGENT: SI_LOOP
145145
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
146146
G_BR %bb.5
147147
148148
bb.5:
149149
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
150150
%18:_(s64) = G_PHI %16(s64), %bb.4
151-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
151+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
152152
G_BR %bb.3
153153
154154
bb.6:
@@ -191,7 +191,7 @@ body: |
191191
192192
%15:_(s64) = G_PHI %25(s64), %bb.2, %16(s64), %bb.3
193193
%24:_(s1) = G_CONSTANT i1 false
194-
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
194+
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
195195
; CHECK: DIVERGENT: SI_LOOP
196196
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
197197
G_BR %bb.4
@@ -201,7 +201,7 @@ body: |
201201
successors: %bb.5, %bb.2
202202
203203
%18:_(s64) = G_PHI %16(s64), %bb.3
204-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
204+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
205205
G_BRCOND %13(s1), %bb.2
206206
G_BR %bb.5
207207
@@ -241,7 +241,7 @@ body: |
241241
bb.2:
242242
%15:_(s64) = G_PHI %16(s64), %bb.4, %19(s64), %bb.1
243243
%24:_(s1) = G_CONSTANT i1 true
244-
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
244+
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
245245
246246
bb.3:
247247
successors: %bb.4, %bb.3
@@ -259,7 +259,7 @@ body: |
259259
bb.5:
260260
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
261261
%18:_(s64) = G_PHI %16(s64), %bb.4
262-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
262+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
263263
S_ENDPGM 0
264264
265265
...
@@ -291,7 +291,7 @@ body: |
291291
292292
%10:_(s64) = G_PHI %11(s64), %bb.2, %19(s64), %bb.1
293293
%24:_(s1) = G_CONSTANT i1 false
294-
%11:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
294+
%11:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
295295
; CHECK: DIVERGENT: SI_LOOP
296296
SI_LOOP %11(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
297297
G_BR %bb.3
@@ -300,7 +300,7 @@ body: |
300300
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
301301
; CHECK-NOT: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
302302
%13:_(s64) = G_PHI %11(s64), %bb.2
303-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
303+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
304304
%14:_(p4) = COPY %3(p4)
305305
%15:_(s64) = G_CONSTANT i64 40
306306
%16:_(p4) = G_PTR_ADD %14, %15(s64)
@@ -354,15 +354,15 @@ body: |
354354
355355
%15:_(s64) = G_PHI %23(s64), %bb.2, %16(s64), %bb.3
356356
%25:_(s1) = G_CONSTANT i1 false
357-
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
357+
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
358358
; CHECK: DIVERGENT: SI_LOOP
359359
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
360360
G_BR %bb.4
361361
362362
bb.4:
363363
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
364364
%18:_(s64) = G_PHI %16(s64), %bb.3
365-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
365+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
366366
367367
bb.5:
368368

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/uses-value-from-cycle.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,18 @@ body: |
4343
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
4444
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
4545
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
46-
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break)
46+
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
4747
%19:_(s32) = G_PHI %18(s32), %bb.7, %25(s32), %bb.4
4848
%20:_(s32) = G_PHI %6(s32), %bb.7, %25(s32), %bb.4
4949
%21:_(s1) = G_PHI %34(s1), %bb.7, %33(s1), %bb.4
50-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
51-
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
50+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
51+
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
5252
SI_LOOP %22(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
5353
G_BR %bb.6
5454
5555
bb.6:
5656
%24:_(s32) = G_PHI %22(s32), %bb.5
57-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
57+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
5858
SI_RETURN
5959
6060
bb.7:

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -218,17 +218,17 @@ body: |
218218
; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
219219
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
220220
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]]
221-
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
221+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32)
222222
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc
223223
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc
224224
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
225-
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
225+
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
226226
; GFX10-NEXT: G_BR %bb.2
227227
; GFX10-NEXT: {{ $}}
228228
; GFX10-NEXT: bb.2:
229-
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1
229+
; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1
230230
; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
231-
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
231+
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32)
232232
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
233233
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
234234
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]]
@@ -257,14 +257,14 @@ body: |
257257
%14:_(s1) = G_FCMP floatpred(ogt), %13(s32), %0
258258
%15:_(s32) = G_CONSTANT i32 1
259259
%9:_(s32) = G_ADD %8, %15
260-
%7:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32)
260+
%7:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %6(s32)
261261
SI_LOOP %7(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
262262
G_BR %bb.2
263263
264264
bb.2:
265265
%16:_(s1) = G_PHI %11(s1), %bb.1
266266
%17:_(s32) = G_PHI %7(s32), %bb.1
267-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
267+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %17(s32)
268268
%18:_(s32) = G_FCONSTANT float 0.000000e+00
269269
%19:_(s32) = G_FCONSTANT float 1.000000e+00
270270
%20:_(s32) = G_SELECT %16(s1), %19, %18
@@ -348,18 +348,18 @@ body: |
348348
; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]]
349349
; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
350350
; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C9]]
351-
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
351+
; GFX10-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32)
352352
; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1)
353353
; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc
354354
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc
355355
; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc
356-
; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
356+
; GFX10-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
357357
; GFX10-NEXT: G_BR %bb.6
358358
; GFX10-NEXT: {{ $}}
359359
; GFX10-NEXT: bb.6:
360-
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5
360+
; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.5
361361
; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1)
362-
; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
362+
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32)
363363
; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
364364
; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
365365
; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY13]](s1), [[C11]], [[C10]]
@@ -429,14 +429,14 @@ body: |
429429
%31:_(s1) = G_FCMP floatpred(ogt), %30(s32), %0
430430
%32:_(s32) = G_CONSTANT i32 1
431431
%17:_(s32) = G_ADD %16, %32
432-
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %31(s1), %14(s32)
432+
%15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %31(s1), %14(s32)
433433
SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
434434
G_BR %bb.6
435435
436436
bb.6:
437437
%33:_(s1) = G_PHI %19(s1), %bb.5
438438
%34:_(s32) = G_PHI %15(s32), %bb.5
439-
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
439+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %34(s32)
440440
%35:_(s32) = G_FCONSTANT float 0.000000e+00
441441
%36:_(s32) = G_FCONSTANT float 1.000000e+00
442442
%37:_(s32) = G_SELECT %33(s1), %36, %35

0 commit comments

Comments
 (0)