@@ -452,7 +452,9 @@ class SIInsertWaitcnts : public MachineFunctionPass {
452
452
// FLAT instruction.
453
453
WaitEventType getVmemWaitEventType (const MachineInstr &Inst) const {
454
454
assert (SIInstrInfo::isVMEM (Inst) || SIInstrInfo::isFLAT (Inst));
455
- if (!ST->hasVscnt ())
455
+ // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
456
+ // these should use VM_CNT.
457
+ if (!ST->hasVscnt () || SIInstrInfo::mayWriteLDSThroughDMA (Inst))
456
458
return VMEM_ACCESS;
457
459
if (Inst.mayStore () && !SIInstrInfo::isAtomicRet (Inst)) {
458
460
// FLAT and SCRATCH instructions may access scratch. Other VMEM
@@ -544,14 +546,6 @@ void WaitcntBrackets::setExpScore(const MachineInstr *MI,
544
546
}
545
547
}
546
548
547
- // MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS written
548
- // can be accessed. A load from LDS to VMEM does not need a wait.
549
- static bool mayWriteLDSThroughDMA (const MachineInstr &MI) {
550
- return SIInstrInfo::isVALU (MI) &&
551
- (SIInstrInfo::isMUBUF (MI) || SIInstrInfo::isFLAT (MI)) &&
552
- MI.getOpcode () != AMDGPU::BUFFER_STORE_LDS_DWORD;
553
- }
554
-
555
549
void WaitcntBrackets::updateByEvent (const SIInstrInfo *TII,
556
550
const SIRegisterInfo *TRI,
557
551
const MachineRegisterInfo *MRI,
@@ -703,7 +697,10 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
703
697
setRegScore (RegNo, T, CurrScore);
704
698
}
705
699
}
706
- if (Inst.mayStore () && (TII->isDS (Inst) || mayWriteLDSThroughDMA (Inst))) {
700
+ if (Inst.mayStore () &&
701
+ (TII->isDS (Inst) || TII->mayWriteLDSThroughDMA (Inst))) {
702
+ // MUBUF and FLAT LDS DMA operations need a wait on vmcnt before LDS
703
+ // written can be accessed. A load from LDS to VMEM does not need a wait.
707
704
setRegScore (SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
708
705
}
709
706
}
@@ -1178,7 +1175,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
1178
1175
if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::FLAT_ADDRESS)
1179
1176
continue ;
1180
1177
// No need to wait before load from VMEM to LDS.
1181
- if (mayWriteLDSThroughDMA (MI))
1178
+ if (TII-> mayWriteLDSThroughDMA (MI))
1182
1179
continue ;
1183
1180
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
1184
1181
// VM_CNT is only relevant to vgpr or LDS.
0 commit comments