Skip to content

Commit d02e468

Browse files
committed
[AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: use BranchProbability and TargetSchedModel
Remove s_cbranch_execnz branches if the transformation is profitable according to BranchProbability and TargetSchedmodel.
1 parent 808c498 commit d02e468

26 files changed

+349
-482
lines changed

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 98 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "GCNSubtarget.h"
1616
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1717
#include "llvm/CodeGen/MachineFunctionPass.h"
18+
#include "llvm/CodeGen/TargetSchedule.h"
19+
#include "llvm/Support/BranchProbability.h"
1820

1921
using namespace llvm;
2022

@@ -41,7 +43,8 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4143
MachineBasicBlock *&TrueMBB,
4244
MachineBasicBlock *&FalseMBB,
4345
SmallVectorImpl<MachineOperand> &Cond);
44-
bool mustRetainExeczBranch(const MachineBasicBlock &From,
46+
bool mustRetainExeczBranch(const MachineBasicBlock &Head,
47+
const MachineBasicBlock &From,
4548
const MachineBasicBlock &To) const;
4649
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
4750

@@ -304,11 +307,95 @@ bool SIPreEmitPeephole::getBlockDestinations(
304307
return true;
305308
}
306309

307-
bool SIPreEmitPeephole::mustRetainExeczBranch(
308-
const MachineBasicBlock &From, const MachineBasicBlock &To) const {
310+
namespace {
311+
class CostModelBase {
312+
public:
313+
virtual bool isProfitable(const MachineInstr &MI) = 0;
314+
virtual ~CostModelBase() = default;
315+
static std::unique_ptr<CostModelBase> Create(const MachineBasicBlock &MBB,
316+
const MachineBasicBlock &,
317+
const SIInstrInfo &TII);
318+
};
319+
320+
class TrivialCostModel : public CostModelBase {
321+
friend CostModelBase;
322+
309323
unsigned NumInstr = 0;
310-
const MachineFunction *MF = From.getParent();
324+
const SIInstrInfo &TII;
325+
326+
TrivialCostModel(const SIInstrInfo &TII) : TII(TII) {}
327+
328+
public:
329+
bool isProfitable(const MachineInstr &MI) override {
330+
++NumInstr;
331+
if (NumInstr >= SkipThreshold)
332+
return false;
333+
// These instructions are potentially expensive even if EXEC = 0.
334+
if (TII.isSMRD(MI) || TII.isVMEM(MI) || TII.isFLAT(MI) || TII.isDS(MI) ||
335+
TII.isWaitcnt(MI.getOpcode()))
336+
return false;
337+
return true;
338+
}
339+
~TrivialCostModel() override = default;
340+
};
341+
342+
class BranchWeightCostModel : public CostModelBase {
343+
friend CostModelBase;
311344

345+
BranchProbability BranchProb;
346+
const TargetSchedModel &SchedModel;
347+
uint64_t BranchCost;
348+
uint64_t ThenCyclesCost = 0;
349+
350+
BranchWeightCostModel(const MachineInstr &Branch, const BranchProbability &BP,
351+
const TargetSchedModel &SchedModel)
352+
: BranchProb(BP), SchedModel(SchedModel) {
353+
assert(!BP.isUnknown());
354+
BranchCost = SchedModel.computeInstrLatency(&Branch, false);
355+
}
356+
357+
public:
358+
bool isProfitable(const MachineInstr &MI) override {
359+
ThenCyclesCost += SchedModel.computeInstrLatency(&MI, false);
360+
361+
// Consider `P = N/D` to be the probability of execnz being true
362+
// The transformation is profitable if always executing the 'then' block
363+
// is cheaper than executing sometimes 'then' and always
364+
// executing s_cbranch_execnz:
365+
// * ThenCost <= P*ThenCost + BranchCost
366+
// * (1-P) * ThenCost <= BranchCost
367+
// * (D-N)/D * ThenCost <= BranchCost
368+
uint64_t Numerator = BranchProb.getNumerator();
369+
uint64_t Denominator = BranchProb.getDenominator();
370+
return (Denominator - Numerator) * ThenCyclesCost <=
371+
Denominator * BranchCost;
372+
}
373+
~BranchWeightCostModel() override = default;
374+
};
375+
376+
std::unique_ptr<CostModelBase>
377+
CostModelBase::Create(const MachineBasicBlock &Head,
378+
const MachineBasicBlock &Succ, const SIInstrInfo &TII) {
379+
const auto *FromIt = find(Head.successors(), &Succ);
380+
assert(FromIt != Head.succ_end());
381+
382+
BranchProbability ExecNZProb = Head.getSuccProbability(FromIt);
383+
const auto &SchedModel = TII.getSchedModel();
384+
if (!ExecNZProb.isUnknown()) {
385+
return std::unique_ptr<CostModelBase>(new BranchWeightCostModel(
386+
*Head.getFirstTerminator(), ExecNZProb, SchedModel));
387+
}
388+
389+
return std::unique_ptr<CostModelBase>(new TrivialCostModel(TII));
390+
}
391+
392+
bool SIPreEmitPeephole::mustRetainExeczBranch(
393+
const MachineBasicBlock &Head, const MachineBasicBlock &From,
394+
const MachineBasicBlock &To) const {
395+
396+
auto CostModel = CostModelBase::Create(Head, From, *TII);
397+
398+
const MachineFunction *MF = From.getParent();
312399
for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
313400
MBBI != End && MBBI != ToI; ++MBBI) {
314401
const MachineBasicBlock &MBB = *MBBI;
@@ -326,19 +413,14 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
326413
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
327414
return true;
328415

329-
// These instructions are potentially expensive even if EXEC = 0.
330-
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331-
TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332-
return true;
333-
334-
++NumInstr;
335-
if (NumInstr >= SkipThreshold)
416+
if (!CostModel->isProfitable(MI))
336417
return true;
337418
}
338419
}
339420

340421
return false;
341422
}
423+
} // namespace
342424

343425
// Returns true if the skip branch instruction is removed.
344426
bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
@@ -351,8 +433,11 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
351433
return false;
352434

353435
// Consider only the forward branches.
354-
if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
355-
mustRetainExeczBranch(*FalseMBB, *TrueMBB))
436+
if (SrcMBB.getNumber() >= TrueMBB->getNumber())
437+
return false;
438+
439+
// Consider only when it is legal and profitable
440+
if (mustRetainExeczBranch(SrcMBB, *FalseMBB, *TrueMBB))
356441
return false;
357442

358443
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);

llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,7 +1726,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
17261726
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
17271727
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
17281728
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1729-
; GFX90A-NEXT: s_cbranch_execz .LBB59_2
17301729
; GFX90A-NEXT: ; %bb.1:
17311730
; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
17321731
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1736,7 +1735,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
17361735
; GFX90A-NEXT: v_mov_b32_e32 v2, s2
17371736
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
17381737
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1739-
; GFX90A-NEXT: .LBB59_2:
1738+
; GFX90A-NEXT: ; %bb.2:
17401739
; GFX90A-NEXT: s_endpgm
17411740
;
17421741
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
@@ -1747,7 +1746,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
17471746
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
17481747
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
17491748
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1750-
; GFX940-NEXT: s_cbranch_execz .LBB59_2
17511749
; GFX940-NEXT: ; %bb.1:
17521750
; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
17531751
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1757,7 +1755,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
17571755
; GFX940-NEXT: v_mov_b32_e32 v2, s2
17581756
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
17591757
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1760-
; GFX940-NEXT: .LBB59_2:
1758+
; GFX940-NEXT: ; %bb.2:
17611759
; GFX940-NEXT: s_endpgm
17621760
main_body:
17631761
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
@@ -1773,7 +1771,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
17731771
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
17741772
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
17751773
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1776-
; GFX90A-NEXT: s_cbranch_execz .LBB60_2
17771774
; GFX90A-NEXT: ; %bb.1:
17781775
; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
17791776
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1783,7 +1780,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
17831780
; GFX90A-NEXT: v_mov_b32_e32 v2, s2
17841781
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
17851782
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1786-
; GFX90A-NEXT: .LBB60_2:
1783+
; GFX90A-NEXT: ; %bb.2:
17871784
; GFX90A-NEXT: s_endpgm
17881785
;
17891786
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
@@ -1794,7 +1791,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
17941791
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
17951792
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
17961793
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1797-
; GFX940-NEXT: s_cbranch_execz .LBB60_2
17981794
; GFX940-NEXT: ; %bb.1:
17991795
; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
18001796
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1804,7 +1800,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
18041800
; GFX940-NEXT: v_mov_b32_e32 v2, s2
18051801
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
18061802
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1807-
; GFX940-NEXT: .LBB60_2:
1803+
; GFX940-NEXT: ; %bb.2:
18081804
; GFX940-NEXT: s_endpgm
18091805
main_body:
18101806
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
@@ -1820,7 +1816,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
18201816
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
18211817
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
18221818
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1823-
; GFX90A-NEXT: s_cbranch_execz .LBB61_2
18241819
; GFX90A-NEXT: ; %bb.1:
18251820
; GFX90A-NEXT: s_load_dword s2, s[2:3], 0x24
18261821
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1830,7 +1825,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
18301825
; GFX90A-NEXT: v_mov_b32_e32 v2, s2
18311826
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
18321827
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1833-
; GFX90A-NEXT: .LBB61_2:
1828+
; GFX90A-NEXT: ; %bb.2:
18341829
; GFX90A-NEXT: s_endpgm
18351830
;
18361831
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
@@ -1841,7 +1836,6 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
18411836
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
18421837
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
18431838
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1844-
; GFX940-NEXT: s_cbranch_execz .LBB61_2
18451839
; GFX940-NEXT: ; %bb.1:
18461840
; GFX940-NEXT: s_load_dword s2, s[2:3], 0x24
18471841
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
@@ -1851,7 +1845,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
18511845
; GFX940-NEXT: v_mov_b32_e32 v2, s2
18521846
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
18531847
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1854-
; GFX940-NEXT: .LBB61_2:
1848+
; GFX940-NEXT: ; %bb.2:
18551849
; GFX940-NEXT: s_endpgm
18561850
main_body:
18571851
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0

llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -526,21 +526,19 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
526526
; GFX10-NEXT: v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3]
527527
; GFX10-NEXT: s_and_saveexec_b32 s0, vcc_lo
528528
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
529-
; GFX10-NEXT: s_cbranch_execz .LBB10_2
530529
; GFX10-NEXT: ; %bb.1: ; %else
531530
; GFX10-NEXT: s_waitcnt vmcnt(0)
532531
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s1, v2, v4, 0
533532
; GFX10-NEXT: v_mad_u64_u32 v[1:2], s1, v2, v5, v[1:2]
534533
; GFX10-NEXT: ; implicit-def: $vgpr2_vgpr3
535534
; GFX10-NEXT: ; implicit-def: $vgpr4_vgpr5
536-
; GFX10-NEXT: .LBB10_2: ; %Flow
535+
; GFX10-NEXT: ; %bb.2: ; %Flow
537536
; GFX10-NEXT: s_andn2_saveexec_b32 s0, s0
538-
; GFX10-NEXT: s_cbranch_execz .LBB10_4
539537
; GFX10-NEXT: ; %bb.3: ; %if
540538
; GFX10-NEXT: s_waitcnt vmcnt(0)
541539
; GFX10-NEXT: v_mul_lo_u32 v1, v2, v5
542540
; GFX10-NEXT: v_mov_b32_e32 v0, 0
543-
; GFX10-NEXT: .LBB10_4: ; %endif
541+
; GFX10-NEXT: ; %bb.4: ; %endif
544542
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
545543
; GFX10-NEXT: v_mov_b32_e32 v2, 0
546544
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
@@ -563,7 +561,6 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
563561
; GFX11-NEXT: s_waitcnt vmcnt(1)
564562
; GFX11-NEXT: v_cmpx_ge_u64_e32 0, v[2:3]
565563
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
566-
; GFX11-NEXT: s_cbranch_execz .LBB10_2
567564
; GFX11-NEXT: ; %bb.1: ; %else
568565
; GFX11-NEXT: s_waitcnt vmcnt(0)
569566
; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v4, 0
@@ -572,14 +569,13 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
572569
; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
573570
; GFX11-NEXT: v_mov_b32_e32 v1, v3
574571
; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3
575-
; GFX11-NEXT: .LBB10_2: ; %Flow
572+
; GFX11-NEXT: ; %bb.2: ; %Flow
576573
; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0
577-
; GFX11-NEXT: s_cbranch_execz .LBB10_4
578574
; GFX11-NEXT: ; %bb.3: ; %if
579575
; GFX11-NEXT: s_waitcnt vmcnt(0)
580576
; GFX11-NEXT: v_mul_lo_u32 v1, v2, v5
581577
; GFX11-NEXT: v_mov_b32_e32 v0, 0
582-
; GFX11-NEXT: .LBB10_4: ; %endif
578+
; GFX11-NEXT: ; %bb.4: ; %endif
583579
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
584580
; GFX11-NEXT: v_mov_b32_e32 v2, 0
585581
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]

llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
292292
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293293
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
294294
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
295-
; GFX9-NEXT: s_cbranch_execz .LBB5_2
296295
; GFX9-NEXT: ; %bb.1: ; %if.then
297296
; GFX9-NEXT: s_mov_b32 s11, s18
298297
; GFX9-NEXT: s_mov_b32 s10, s17
@@ -301,7 +300,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
301300
; GFX9-NEXT: v_mov_b32_e32 v0, s6
302301
; GFX9-NEXT: v_mov_b32_e32 v1, s19
303302
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
304-
; GFX9-NEXT: .LBB5_2: ; %if.end
303+
; GFX9-NEXT: ; %bb.2: ; %if.end
305304
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
306305
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307306
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -311,7 +310,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
311310
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312311
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
313312
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
314-
; GFX1010-NEXT: s_cbranch_execz .LBB5_2
315313
; GFX1010-NEXT: ; %bb.1: ; %if.then
316314
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
317315
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
@@ -320,7 +318,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
320318
; GFX1010-NEXT: s_mov_b32 s9, s16
321319
; GFX1010-NEXT: s_mov_b32 s8, s7
322320
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
323-
; GFX1010-NEXT: .LBB5_2: ; %if.end
321+
; GFX1010-NEXT: ; %bb.2: ; %if.end
324322
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
325323
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
326324
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -331,7 +329,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
331329
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332330
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
333331
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
334-
; GFX1030-NEXT: s_cbranch_execz .LBB5_2
335332
; GFX1030-NEXT: ; %bb.1: ; %if.then
336333
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
337334
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
@@ -340,7 +337,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
340337
; GFX1030-NEXT: s_mov_b32 s9, s16
341338
; GFX1030-NEXT: s_mov_b32 s8, s7
342339
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
343-
; GFX1030-NEXT: .LBB5_2: ; %if.end
340+
; GFX1030-NEXT: ; %bb.2: ; %if.end
344341
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
345342
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346343
; GFX1030-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)