Skip to content

Commit 30a3adf

Browse files
committed
[AMDGPU] Reorder atomic optimizer to avoid CAS loop.
Expand-Atomic pass emits the CAS loop for FP operations which limits the optimizations offered by atomic optimizer. Moving atomic optimizer before expand-atomics allows better codegen. Reviewed By: arsenm, #amdgpu Differential Revision: https://reviews.llvm.org/D157265 Change-Id: I68744786339644060bca4199c041f2020d9b9425
1 parent 0b01944 commit 30a3adf

11 files changed

+7383
-3057
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,13 @@ void AMDGPUPassConfig::addIRPasses() {
10271027
if (TM.getOptLevel() > CodeGenOpt::None)
10281028
addPass(createInferAddressSpacesPass());
10291029

1030+
// Run atomic optimizer before Atomic Expand
1031+
if ((TM.getTargetTriple().getArch() == Triple::amdgcn) &&
1032+
(TM.getOptLevel() >= CodeGenOpt::Less) &&
1033+
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
1034+
addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
1035+
}
1036+
10301037
addPass(createAtomicExpandPass());
10311038

10321039
if (TM.getOptLevel() > CodeGenOpt::None) {
@@ -1153,11 +1160,6 @@ bool GCNPassConfig::addPreISel() {
11531160
if (TM->getOptLevel() > CodeGenOpt::None)
11541161
addPass(createAMDGPULateCodeGenPreparePass());
11551162

1156-
if ((TM->getOptLevel() >= CodeGenOpt::Less) &&
1157-
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
1158-
addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
1159-
}
1160-
11611163
if (TM->getOptLevel() > CodeGenOpt::None)
11621164
addPass(createSinkingPass());
11631165

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -141,19 +141,19 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
141141
define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
142142
; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
143143
; GFX90A_GFX940: bb.1 (%ir-block.0):
144-
; GFX90A_GFX940-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
144+
; GFX90A_GFX940-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
145145
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
146146
; GFX90A_GFX940-NEXT: {{ $}}
147147
; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr0
148148
; GFX90A_GFX940-NEXT: [[PRED_COPY1:%[0-9]+]]:sreg_32 = PRED_COPY $sgpr1
149149
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[PRED_COPY]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1
150150
; GFX90A_GFX940-NEXT: [[PRED_COPY2:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0
151151
; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64_xexec = SI_PS_LIVE
152-
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
152+
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64_xexec = SI_IF [[SI_PS_LIVE]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
153153
; GFX90A_GFX940-NEXT: S_BRANCH %bb.2
154154
; GFX90A_GFX940-NEXT: {{ $}}
155155
; GFX90A_GFX940-NEXT: bb.2 (%ir-block.5):
156-
; GFX90A_GFX940-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
156+
; GFX90A_GFX940-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
157157
; GFX90A_GFX940-NEXT: {{ $}}
158158
; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY $exec
159159
; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY3]].sub0
@@ -196,29 +196,22 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
196196
; GFX90A_GFX940-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[PRED_COPY18]], implicit $exec
197197
; GFX90A_GFX940-NEXT: [[PRED_COPY19:%[0-9]+]]:vgpr_32 = PRED_COPY [[S_MOV_B32_1]]
198198
; GFX90A_GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_HI_U32_B32_e64_]], [[PRED_COPY19]], implicit $exec
199-
; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
199+
; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
200200
; GFX90A_GFX940-NEXT: S_BRANCH %bb.3
201201
; GFX90A_GFX940-NEXT: {{ $}}
202-
; GFX90A_GFX940-NEXT: bb.3 (%ir-block.36):
203-
; GFX90A_GFX940-NEXT: successors: %bb.5(0x80000000)
202+
; GFX90A_GFX940-NEXT: bb.3 (%ir-block.35):
203+
; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000)
204204
; GFX90A_GFX940-NEXT: {{ $}}
205205
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
206206
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[STRICT_WWM]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
207-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.5
208207
; GFX90A_GFX940-NEXT: {{ $}}
209208
; GFX90A_GFX940-NEXT: bb.4.Flow:
210-
; GFX90A_GFX940-NEXT: successors: %bb.6(0x80000000)
211-
; GFX90A_GFX940-NEXT: {{ $}}
212-
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
213-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.6
214-
; GFX90A_GFX940-NEXT: {{ $}}
215-
; GFX90A_GFX940-NEXT: bb.5 (%ir-block.38):
216-
; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000)
209+
; GFX90A_GFX940-NEXT: successors: %bb.5(0x80000000)
217210
; GFX90A_GFX940-NEXT: {{ $}}
218211
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
219-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.4
220212
; GFX90A_GFX940-NEXT: {{ $}}
221-
; GFX90A_GFX940-NEXT: bb.6 (%ir-block.39):
213+
; GFX90A_GFX940-NEXT: bb.5 (%ir-block.37):
214+
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
222215
; GFX90A_GFX940-NEXT: S_ENDPGM 0
223216
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
224217
ret void

llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
206206
; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
207207
; GFX11-NEXT: S_BRANCH %bb.3
208208
; GFX11-NEXT: {{ $}}
209-
; GFX11-NEXT: bb.3 (%ir-block.39):
209+
; GFX11-NEXT: bb.3 (%ir-block.36):
210210
; GFX11-NEXT: successors: %bb.5(0x80000000)
211211
; GFX11-NEXT: {{ $}}
212212
; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -220,7 +220,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
220220
; GFX11-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
221221
; GFX11-NEXT: S_BRANCH %bb.6
222222
; GFX11-NEXT: {{ $}}
223-
; GFX11-NEXT: bb.5 (%ir-block.42):
223+
; GFX11-NEXT: bb.5 (%ir-block.39):
224224
; GFX11-NEXT: successors: %bb.4(0x80000000)
225225
; GFX11-NEXT: {{ $}}
226226
; GFX11-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.3, [[DEF]], %bb.2
@@ -231,7 +231,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
231231
; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PRED_COPY15]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
232232
; GFX11-NEXT: S_BRANCH %bb.4
233233
; GFX11-NEXT: {{ $}}
234-
; GFX11-NEXT: bb.6 (%ir-block.50):
234+
; GFX11-NEXT: bb.6 (%ir-block.47):
235235
; GFX11-NEXT: $vgpr0 = PRED_COPY [[PHI]]
236236
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
237237
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic

llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,15 @@ define amdgpu_ps void @main(i32 %arg) {
1414
; GFX10-NEXT: s_mov_b32 s4, 0
1515
; GFX10-NEXT: s_mov_b32 s2, 0
1616
; GFX10-NEXT: s_branch .LBB0_2
17-
; GFX10-NEXT: .LBB0_1: ; %Flow
18-
; GFX10-NEXT: ; in Loop: Header=BB0_2 Depth=1
17+
; GFX10-NEXT: .LBB0_1: ; in Loop: Header=BB0_2 Depth=1
1918
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
2019
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s3
21-
; GFX10-NEXT: s_and_b32 s0, exec_lo, vcc_lo
22-
; GFX10-NEXT: s_or_b32 s2, s0, s2
2320
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
2421
; GFX10-NEXT: s_cbranch_execz .LBB0_5
2522
; GFX10-NEXT: .LBB0_2: ; %bb4
2623
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
24+
; GFX10-NEXT: s_and_b32 s0, exec_lo, vcc_lo
25+
; GFX10-NEXT: s_or_b32 s2, s0, s2
2726
; GFX10-NEXT: s_and_saveexec_b32 s3, s1
2827
; GFX10-NEXT: s_cbranch_execz .LBB0_1
2928
; GFX10-NEXT: ; %bb.3: ; in Loop: Header=BB0_2 Depth=1

llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1)
147147
define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
148148
; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
149149
; GFX90A_GFX940: bb.0 (%ir-block.0):
150-
; GFX90A_GFX940-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
150+
; GFX90A_GFX940-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000)
151151
; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0
152152
; GFX90A_GFX940-NEXT: {{ $}}
153153
; GFX90A_GFX940-NEXT: [[PRED_COPY:%[0-9]+]]:vgpr_32 = PRED_COPY $vgpr0
@@ -156,11 +156,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
156156
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[PRED_COPY2]], %subreg.sub0, [[PRED_COPY1]], %subreg.sub1
157157
; GFX90A_GFX940-NEXT: [[PRED_COPY3:%[0-9]+]]:sreg_64 = PRED_COPY [[REG_SEQUENCE]]
158158
; GFX90A_GFX940-NEXT: [[SI_PS_LIVE:%[0-9]+]]:sreg_64 = SI_PS_LIVE
159-
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
159+
; GFX90A_GFX940-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[SI_PS_LIVE]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
160160
; GFX90A_GFX940-NEXT: S_BRANCH %bb.1
161161
; GFX90A_GFX940-NEXT: {{ $}}
162162
; GFX90A_GFX940-NEXT: bb.1 (%ir-block.5):
163-
; GFX90A_GFX940-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
163+
; GFX90A_GFX940-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
164164
; GFX90A_GFX940-NEXT: {{ $}}
165165
; GFX90A_GFX940-NEXT: [[PRED_COPY4:%[0-9]+]]:sreg_64 = PRED_COPY $exec
166166
; GFX90A_GFX940-NEXT: [[PRED_COPY5:%[0-9]+]]:sreg_32 = PRED_COPY [[PRED_COPY4]].sub1
@@ -188,30 +188,23 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
188188
; GFX90A_GFX940-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 killed [[V_ADD_F32_e64_5]], killed [[S_MOV_B32_2]]
189189
; GFX90A_GFX940-NEXT: early-clobber %1:sgpr_32 = STRICT_WWM killed [[V_READLANE_B32_]], implicit $exec
190190
; GFX90A_GFX940-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MBCNT_HI_U32_B32_e64_]], [[S_MOV_B32_]], implicit $exec
191-
; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
191+
; GFX90A_GFX940-NEXT: [[SI_IF1:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
192192
; GFX90A_GFX940-NEXT: S_BRANCH %bb.2
193193
; GFX90A_GFX940-NEXT: {{ $}}
194-
; GFX90A_GFX940-NEXT: bb.2 (%ir-block.36):
195-
; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000)
194+
; GFX90A_GFX940-NEXT: bb.2 (%ir-block.35):
195+
; GFX90A_GFX940-NEXT: successors: %bb.3(0x80000000)
196196
; GFX90A_GFX940-NEXT: {{ $}}
197197
; GFX90A_GFX940-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
198198
; GFX90A_GFX940-NEXT: [[PRED_COPY8:%[0-9]+]]:vgpr_32 = PRED_COPY %1
199199
; GFX90A_GFX940-NEXT: GLOBAL_ATOMIC_ADD_F32_SADDR killed [[V_MOV_B32_e32_1]], [[PRED_COPY8]], [[PRED_COPY3]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
200-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.4
201200
; GFX90A_GFX940-NEXT: {{ $}}
202201
; GFX90A_GFX940-NEXT: bb.3.Flow:
203-
; GFX90A_GFX940-NEXT: successors: %bb.5(0x80000000)
204-
; GFX90A_GFX940-NEXT: {{ $}}
205-
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
206-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.5
207-
; GFX90A_GFX940-NEXT: {{ $}}
208-
; GFX90A_GFX940-NEXT: bb.4 (%ir-block.38):
209-
; GFX90A_GFX940-NEXT: successors: %bb.3(0x80000000)
202+
; GFX90A_GFX940-NEXT: successors: %bb.4(0x80000000)
210203
; GFX90A_GFX940-NEXT: {{ $}}
211204
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
212-
; GFX90A_GFX940-NEXT: S_BRANCH %bb.3
213205
; GFX90A_GFX940-NEXT: {{ $}}
214-
; GFX90A_GFX940-NEXT: bb.5 (%ir-block.39):
206+
; GFX90A_GFX940-NEXT: bb.4 (%ir-block.37):
207+
; GFX90A_GFX940-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
215208
; GFX90A_GFX940-NEXT: S_ENDPGM 0
216209
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
217210
ret void

llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
203203
; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
204204
; GFX11-NEXT: S_BRANCH %bb.2
205205
; GFX11-NEXT: {{ $}}
206-
; GFX11-NEXT: bb.2 (%ir-block.39):
206+
; GFX11-NEXT: bb.2 (%ir-block.36):
207207
; GFX11-NEXT: successors: %bb.4(0x80000000)
208208
; GFX11-NEXT: {{ $}}
209209
; GFX11-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@@ -218,7 +218,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
218218
; GFX11-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
219219
; GFX11-NEXT: S_BRANCH %bb.5
220220
; GFX11-NEXT: {{ $}}
221-
; GFX11-NEXT: bb.4 (%ir-block.42):
221+
; GFX11-NEXT: bb.4 (%ir-block.39):
222222
; GFX11-NEXT: successors: %bb.3(0x80000000)
223223
; GFX11-NEXT: {{ $}}
224224
; GFX11-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[DEF1]], %bb.1, [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]], %bb.2
@@ -228,7 +228,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
228228
; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_READFIRSTLANE_B32_]], 0, killed %44, 0, 0, implicit $mode, implicit $exec
229229
; GFX11-NEXT: S_BRANCH %bb.3
230230
; GFX11-NEXT: {{ $}}
231-
; GFX11-NEXT: bb.5 (%ir-block.50):
231+
; GFX11-NEXT: bb.5 (%ir-block.47):
232232
; GFX11-NEXT: $vgpr0 = PRED_COPY [[PHI]]
233233
; GFX11-NEXT: SI_RETURN_TO_EPILOG $vgpr0
234234
%ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic

0 commit comments

Comments
 (0)