@@ -2873,9 +2873,20 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
2873
2873
2874
2874
auto I = MBB.end ();
2875
2875
2876
+ // Note: as this is used after hazard recognizer we need to apply some hazard
2877
+ // workarounds directly.
2878
+ const bool FlushSGPRWrites = (ST.isWave64 () && ST.hasVALUMaskWriteHazard ()) ||
2879
+ ST.hasVALUReadSGPRHazard ();
2880
+ auto ApplyHazardWorkarounds = [this , &MBB, &I, &DL, FlushSGPRWrites]() {
2881
+ if (FlushSGPRWrites)
2882
+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2883
+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
2884
+ };
2885
+
2876
2886
// We need to compute the offset relative to the instruction immediately after
2877
2887
// s_getpc_b64. Insert pc arithmetic code before last terminator.
2878
2888
MachineInstr *GetPC = BuildMI (MBB, I, DL, get (AMDGPU::S_GETPC_B64), PCReg);
2889
+ ApplyHazardWorkarounds ();
2879
2890
2880
2891
auto &MCCtx = MF->getContext ();
2881
2892
MCSymbol *PostGetPCLabel =
@@ -2894,6 +2905,7 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
2894
2905
.addReg (PCReg, RegState::Define, AMDGPU::sub1)
2895
2906
.addReg (PCReg, 0 , AMDGPU::sub1)
2896
2907
.addSym (OffsetHi, MO_FAR_BRANCH_OFFSET);
2908
+ ApplyHazardWorkarounds ();
2897
2909
2898
2910
// Insert the indirect branch after the other terminator.
2899
2911
BuildMI (&MBB, DL, get (AMDGPU::S_SETPC_B64))
0 commit comments