@@ -190,6 +190,21 @@ class AMDGPUWaitSGPRHazards {
190
190
return Mask;
191
191
}
192
192
193
+ bool mergeSubsequentWaitAlus (MachineBasicBlock::instr_iterator &MI,
194
+ unsigned Mask) {
195
+ auto MBB = MI->getParent ();
196
+ if (MI != MBB->instr_begin ()) {
197
+ MachineBasicBlock::instr_iterator It = std::prev (MI);
198
+ while (It != MBB->instr_begin () && It->isDebugInstr ())
199
+ --It;
200
+ if (It->getOpcode () == AMDGPU::S_WAITCNT_DEPCTR) {
201
+ It->getOperand (0 ).setImm (mergeMasks (Mask, It->getOperand (0 ).getImm ()));
202
+ return true ;
203
+ }
204
+ }
205
+ return false ;
206
+ }
207
+
193
208
bool runOnMachineBasicBlock (MachineBasicBlock &MBB, bool Emit) {
194
209
enum { WA_VALU = 0x1 , WA_SALU = 0x2 , WA_VCC = 0x4 };
195
210
@@ -388,21 +403,12 @@ class AMDGPUWaitSGPRHazards {
388
403
Mask = AMDGPU::DepCtr::encodeFieldVaSdst (Mask, 0 );
389
404
}
390
405
if (Emit) {
391
- if (MI != MBB.instr_begin ()) {
392
- MachineBasicBlock::instr_iterator It = std::prev (MI);
393
- while (It != MBB.instr_begin () && It->isDebugInstr ())
394
- --It;
395
- if (It->getOpcode () == AMDGPU::S_WAITCNT_DEPCTR) {
396
- Mask = mergeMasks (Mask, It->getOperand (0 ).getImm ());
397
- It->getOperand (0 ).setImm (Mask);
398
- continue ;
399
- }
406
+ if (!mergeSubsequentWaitAlus (MI, Mask)) {
407
+ auto NewMI = BuildMI (MBB, MI, MI->getDebugLoc (),
408
+ TII->get (AMDGPU::S_WAITCNT_DEPCTR))
409
+ .addImm (Mask);
410
+ updateGetPCBundle (NewMI);
400
411
}
401
-
402
- auto NewMI = BuildMI (MBB, MI, MI->getDebugLoc (),
403
- TII->get (AMDGPU::S_WAITCNT_DEPCTR))
404
- .addImm (Mask);
405
- updateGetPCBundle (NewMI);
406
412
Emitted = true ;
407
413
}
408
414
}
0 commit comments