Skip to content

Commit 92ae8f9

Browse files
committed
[AMDGPU] Add target hook to isGlobalMemoryObject
We want special handing for IGLP instructions in the scheduler but they should still be treated like they have side effects by other passes. Add a target hook to the ScheduleDAGInstrs DAG builder so that we have more control over this.
1 parent 4512bbe commit 92ae8f9

File tree

8 files changed

+84
-36
lines changed

8 files changed

+84
-36
lines changed

llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,10 @@ namespace llvm {
374374
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
375375
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
376376

377+
/// Returns true if MI is an instruction we are unable to reason about
378+
/// (like a call or something with unmodeled side effects).
379+
virtual bool isGlobalMemoryObject(MachineInstr *MI);
380+
377381
/// Returns a mask for which lanes get read/written by the given (register)
378382
/// machine operand.
379383
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
549549

550550
/// Returns true if MI is an instruction we are unable to reason about
551551
/// (like a call or something with unmodeled side effects).
552-
static inline bool isGlobalMemoryObject(MachineInstr *MI) {
552+
bool ScheduleDAGInstrs::isGlobalMemoryObject(MachineInstr *MI) {
553553
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
554554
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
555555
}

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -240,23 +240,6 @@ class SchedGroup {
240240
}
241241
};
242242

243-
// Remove all existing edges from a SCHED_BARRIER or SCHED_GROUP_BARRIER.
244-
static void resetEdges(SUnit &SU, ScheduleDAGInstrs *DAG) {
245-
assert(SU.getInstr()->getOpcode() == AMDGPU::SCHED_BARRIER ||
246-
SU.getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
247-
SU.getInstr()->getOpcode() == AMDGPU::IGLP_OPT);
248-
249-
while (!SU.Preds.empty())
250-
for (auto &P : SU.Preds)
251-
SU.removePred(P);
252-
253-
while (!SU.Succs.empty())
254-
for (auto &S : SU.Succs)
255-
for (auto &SP : S.getSUnit()->Preds)
256-
if (SP.getSUnit() == &SU)
257-
S.getSUnit()->removePred(SP);
258-
}
259-
260243
using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int, 4>>;
261244
using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4>;
262245

@@ -460,7 +443,6 @@ void PipelineSolver::makePipeline() {
460443
// Command line requested IGroupLP doesn't have SGBarr
461444
if (!SGBarr)
462445
continue;
463-
resetEdges(*SGBarr, DAG);
464446
SG.link(*SGBarr, false);
465447
}
466448
}
@@ -2567,7 +2549,6 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
25672549
initSchedGroupBarrierPipelineStage(R);
25682550
FoundSB = true;
25692551
} else if (Opc == AMDGPU::IGLP_OPT) {
2570-
resetEdges(*R, DAG);
25712552
if (!FoundSB && !FoundIGLP) {
25722553
FoundIGLP = true;
25732554
ShouldApplyIGLP = initIGLPOpt(*R);
@@ -2589,7 +2570,6 @@ void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
25892570
assert(MI.getOpcode() == AMDGPU::SCHED_BARRIER);
25902571
// Remove all existing edges from the SCHED_BARRIER that were added due to the
25912572
// instruction having side effects.
2592-
resetEdges(SchedBarrier, DAG);
25932573
LLVM_DEBUG(dbgs() << "Building SchedGroup for SchedBarrier with Mask: "
25942574
<< MI.getOperand(0).getImm() << "\n");
25952575
auto InvertedMask =
@@ -2647,7 +2627,6 @@ void IGroupLPDAGMutation::initSchedGroupBarrierPipelineStage(
26472627
std::vector<SUnit>::reverse_iterator RIter) {
26482628
// Remove all existing edges from the SCHED_GROUP_BARRIER that were added due
26492629
// to the instruction having side effects.
2650-
resetEdges(*RIter, DAG);
26512630
MachineInstr &SGB = *RIter->getInstr();
26522631
assert(SGB.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER);
26532632
int32_t SGMask = SGB.getOperand(0).getImm();

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "AMDGPUIGroupLP.h"
2828
#include "SIMachineFunctionInfo.h"
2929
#include "llvm/CodeGen/RegisterClassInfo.h"
30+
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
3031

3132
#define DEBUG_TYPE "machine-scheduler"
3233

@@ -1748,18 +1749,43 @@ void GCNScheduleDAGMILive::updateRegionBoundaries(
17481749
}
17491750
}
17501751

1752+
static bool isIGLPInstr(MachineInstr *MI) {
1753+
switch (MI->getOpcode()) {
1754+
case AMDGPU::IGLP_OPT:
1755+
case AMDGPU::SCHED_BARRIER:
1756+
case AMDGPU::SCHED_GROUP_BARRIER:
1757+
return true;
1758+
default:
1759+
return false;
1760+
}
1761+
}
1762+
17511763
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
17521764
return any_of(*DAG, [](MachineBasicBlock::iterator MI) {
17531765
unsigned Opc = MI->getOpcode();
17541766
return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
17551767
});
17561768
}
17571769

1770+
bool GCNScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
1771+
if (isIGLPInstr(MI))
1772+
return false;
1773+
1774+
return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
1775+
}
1776+
17581777
GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
17591778
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
17601779
bool RemoveKillFlags)
17611780
: ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
17621781

1782+
bool GCNPostScheduleDAGMILive::isGlobalMemoryObject(MachineInstr *MI) {
1783+
if (isIGLPInstr(MI))
1784+
return false;
1785+
1786+
return ScheduleDAGInstrs::isGlobalMemoryObject(MI);
1787+
}
1788+
17631789
void GCNPostScheduleDAGMILive::schedule() {
17641790
HasIGLPInstrs = hasIGLPInstrs(this);
17651791
if (HasIGLPInstrs) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
285285

286286
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
287287

288+
bool isGlobalMemoryObject(MachineInstr *MI) override;
289+
288290
public:
289291
GCNScheduleDAGMILive(MachineSchedContext *C,
290292
std::unique_ptr<MachineSchedStrategy> S);
@@ -469,6 +471,8 @@ class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
469471

470472
bool HasIGLPInstrs = false;
471473

474+
bool isGlobalMemoryObject(MachineInstr *MI) override;
475+
472476
public:
473477
void schedule() override;
474478

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.small.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@
2424
; GCN-NEXT: ; implicit-def: $vgpr79
2525
; GCN-NEXT: ; implicit-def: $vgpr80
2626
; GCN-NEXT: ; implicit-def: $vgpr91
27-
; GCN-NEXT: ;;#ASMSTART
28-
; GCN-NEXT: s_waitcnt vmcnt(8)
29-
; GCN-NEXT: ;;#ASMEND
3027
; GCN-NEXT: ; kill: killed $sgpr16_sgpr17_sgpr18_sgpr19
3128
; GCN-NEXT: ; iglp_opt mask(0x00000002)
3229
; GCN-NEXT: s_nop 1
@@ -476,6 +473,9 @@
476473
; GCN-NEXT: s_waitcnt lgkmcnt(0)
477474
; GCN-NEXT: buffer_inv sc0 sc1
478475
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[4:5], v[8:9], v[32:47]
476+
; GCN-NEXT: ;;#ASMSTART
477+
; GCN-NEXT: s_waitcnt vmcnt(8)
478+
; GCN-NEXT: ;;#ASMEND
479479
; GCN-NEXT: v_mov_b32_e32 v4, 0
480480
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[6:7], v[0:1], v[32:47]
481481
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[2:3], v[0:1], v[48:63]

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,41 @@ entry:
285285
ret void
286286
}
287287

288+
define amdgpu_kernel void @test_iglp_opt_asm_sideeffect(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
289+
; GCN-LABEL: test_iglp_opt_asm_sideeffect:
290+
; GCN: ; %bb.0: ; %entry
291+
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
292+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
293+
; GCN-NEXT: v_and_b32_e32 v0, 0xffc, v0
294+
; GCN-NEXT: ; iglp_opt mask(0x00000000)
295+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
296+
; GCN-NEXT: v_add_u32_e32 v1, s0, v0
297+
; GCN-NEXT: ds_read_b32 v1, v1
298+
; GCN-NEXT: v_add_u32_e32 v0, s1, v0
299+
; GCN-NEXT: v_mov_b32_e32 v2, s0
300+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
301+
; GCN-NEXT: ds_write_b32 v0, v1
302+
; GCN-NEXT: ;;#ASMSTART
303+
; GCN-NEXT: ;;#ASMEND
304+
; GCN-NEXT: ds_read_b32 v0, v2 offset:256
305+
; GCN-NEXT: v_mov_b32_e32 v1, s1
306+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
307+
; GCN-NEXT: ds_write_b32 v1, v0 offset:256
308+
; GCN-NEXT: s_endpgm
309+
entry:
310+
%idx = call i32 @llvm.amdgcn.workitem.id.x()
311+
%load.0.addr = getelementptr float, ptr addrspace(3) %in, i32 %idx
312+
%load.0 = load float, ptr addrspace(3) %load.0.addr
313+
%store.0.addr = getelementptr float, ptr addrspace(3) %out, i32 %idx
314+
store float %load.0, ptr addrspace(3) %store.0.addr
315+
call void asm sideeffect "", ""() #1
316+
call void @llvm.amdgcn.iglp.opt(i32 0) #1
317+
%load.1.addr = getelementptr float, ptr addrspace(3) %in, i32 64
318+
%load.1 = load float, ptr addrspace(3) %load.1.addr
319+
%store.1.addr = getelementptr float, ptr addrspace(3) %out, i32 64
320+
store float %load.1, ptr addrspace(3) %store.1.addr
321+
ret void
322+
}
288323

289324
declare void @llvm.amdgcn.iglp.opt(i32) #1
290325
declare i32 @llvm.amdgcn.workitem.id.x() #1

llvm/test/CodeGen/AMDGPU/sched-barrier-pre-RA.mir

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ body: |
9696
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
9797
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
9898
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
99+
; CHECK-NEXT: S_NOP 0
99100
; CHECK-NEXT: SCHED_BARRIER 1
100101
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
101102
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
102-
; CHECK-NEXT: S_NOP 0
103103
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
104104
; CHECK-NEXT: S_ENDPGM 0
105105
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
@@ -163,19 +163,19 @@ body: |
163163
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
164164
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
165165
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
166-
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
167166
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
168-
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
167+
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
169168
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
170-
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
169+
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_1:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
171170
; CHECK-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
172-
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
171+
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
173172
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
173+
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_3:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
174+
; CHECK-NEXT: S_NOP 0
174175
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_4:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_3]], 0, 0, 0, implicit $mode, implicit $exec
175176
; CHECK-NEXT: SCHED_BARRIER 4
176177
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
177178
; CHECK-NEXT: [[V_MUL_LO_U32_e64_3:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
178-
; CHECK-NEXT: S_NOP 0
179179
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_3]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
180180
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_LO_U32_e64_1]], implicit [[V_MUL_LO_U32_e64_2]], implicit [[V_MFMA_F32_4X4X1F32_e64_4]]
181181
%0:sreg_64_xexec_xnull = IMPLICIT_DEF
@@ -258,10 +258,10 @@ body: |
258258
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
259259
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
260260
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
261-
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
262261
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
263262
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
264263
; CHECK-NEXT: S_NOP 0
264+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
265265
; CHECK-NEXT: SCHED_BARRIER 16
266266
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
267267
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
@@ -290,10 +290,10 @@ body: |
290290
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
291291
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
292292
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
293-
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
294293
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
295294
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
296295
; CHECK-NEXT: S_NOP 0
296+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
297297
; CHECK-NEXT: SCHED_BARRIER 32
298298
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec
299299
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 512, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
@@ -354,9 +354,9 @@ body: |
354354
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
355355
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
356356
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
357-
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
358357
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
359358
; CHECK-NEXT: S_NOP 0
359+
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
360360
; CHECK-NEXT: SCHED_BARRIER 128
361361
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
362362
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -386,9 +386,9 @@ body: |
386386
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
387387
; CHECK-NEXT: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
388388
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_]], [[DS_READ_U16_gfx9_]], implicit $exec
389-
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
390389
; CHECK-NEXT: DS_WRITE_B32 [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 16, implicit $m0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 3)
391390
; CHECK-NEXT: S_NOP 0
391+
; CHECK-NEXT: [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 3)
392392
; CHECK-NEXT: SCHED_BARRIER 256
393393
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[DS_READ_U16_gfx9_1]], [[DS_READ_U16_gfx9_1]], implicit $exec
394394
; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
@@ -453,7 +453,6 @@ body: |
453453
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
454454
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)
455455
; CHECK-NEXT: [[DEF2:%[0-9]+]]:areg_128 = IMPLICIT_DEF
456-
; CHECK-NEXT: S_NOP 0
457456
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
458457
; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF1]], implicit $exec
459458
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[DEF2]], 0, 0, 0, implicit $mode, implicit $exec
@@ -462,6 +461,7 @@ body: |
462461
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 0, 0, implicit $exec :: (store (s32) into %ir.out, !noalias !0, addrspace 1)
463462
; CHECK-NEXT: SCHED_BARRIER 12
464463
; CHECK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_2:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[DEF1]], [[GLOBAL_LOAD_DWORD_SADDR]], [[V_MFMA_F32_4X4X1F32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
464+
; CHECK-NEXT: S_NOP 0
465465
; CHECK-NEXT: SCHED_BARRIER 8
466466
; CHECK-NEXT: S_NOP 0
467467
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32) from %ir.in, !alias.scope !0, addrspace 1)

0 commit comments

Comments
 (0)