Skip to content

Commit 9f996e3

Browse files
author
git apple-llvm automerger
committed
Merge commit 'ebdcef20ce29' from llvm.org/master into apple/main
2 parents d24aabc + ebdcef2 commit 9f996e3

12 files changed

+70
-266
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -138,67 +138,71 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
138138
ScheduleHazardRecognizer::HazardType
139139
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
140140
MachineInstr *MI = SU->getInstr();
141+
// If we are not in "HazardRecognizerMode" and therefore not being run from
142+
// the scheduler, track possible stalls from hazards but don't insert noops.
143+
auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
144+
141145
if (MI->isBundle())
142146
return NoHazard;
143147

144148
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
145-
return NoopHazard;
149+
return HazardType;
146150

147151
// FIXME: Should flat be considered vmem?
148152
if ((SIInstrInfo::isVMEM(*MI) ||
149153
SIInstrInfo::isFLAT(*MI))
150154
&& checkVMEMHazards(MI) > 0)
151-
return NoopHazard;
155+
return HazardType;
152156

153157
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
154-
return NoopHazard;
158+
return HazardType;
155159

156160
if (checkFPAtomicToDenormModeHazard(MI) > 0)
157-
return NoopHazard;
161+
return HazardType;
158162

159163
if (ST.hasNoDataDepHazard())
160164
return NoHazard;
161165

162166
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
163-
return NoopHazard;
167+
return HazardType;
164168

165169
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
166-
return NoopHazard;
170+
return HazardType;
167171

168172
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
169-
return NoopHazard;
173+
return HazardType;
170174

171175
if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
172-
return NoopHazard;
176+
return HazardType;
173177

174178
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
175-
return NoopHazard;
179+
return HazardType;
176180

177181
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
178-
return NoopHazard;
182+
return HazardType;
179183

180184
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
181-
return NoopHazard;
185+
return HazardType;
182186

183187
if (ST.hasReadM0MovRelInterpHazard() &&
184188
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
185189
checkReadM0Hazards(MI) > 0)
186-
return NoopHazard;
190+
return HazardType;
187191

188192
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
189193
checkReadM0Hazards(MI) > 0)
190-
return NoopHazard;
194+
return HazardType;
191195

192196
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
193-
return NoopHazard;
197+
return HazardType;
194198

195199
if ((SIInstrInfo::isVMEM(*MI) ||
196200
SIInstrInfo::isFLAT(*MI) ||
197201
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
198-
return NoopHazard;
202+
return HazardType;
199203

200204
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
201-
return NoopHazard;
205+
return HazardType;
202206

203207
return NoHazard;
204208
}
@@ -312,15 +316,19 @@ void GCNHazardRecognizer::EmitNoop() {
312316
void GCNHazardRecognizer::AdvanceCycle() {
313317
// When the scheduler detects a stall, it will call AdvanceCycle() without
314318
// emitting any instructions.
315-
if (!CurrCycleInstr)
319+
if (!CurrCycleInstr) {
320+
EmittedInstrs.push_front(nullptr);
316321
return;
322+
}
317323

318324
// Do not track non-instructions which do not affect the wait states.
319325
// If included, these instructions can lead to buffer overflow such that
320326
// detectable hazards are missed.
321327
if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
322-
CurrCycleInstr->isKill())
328+
CurrCycleInstr->isKill()) {
329+
CurrCycleInstr = nullptr;
323330
return;
331+
}
324332

325333
if (CurrCycleInstr->isBundle()) {
326334
processBundle();

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -602,35 +602,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
602602
}
603603
}
604604

605-
// Combine adjacent s_nops to use the immediate operand encoding how long
606-
// to wait.
607-
//
608-
// s_nop N
609-
// s_nop M
610-
// =>
611-
// s_nop (N + M)
612-
if (MI.getOpcode() == AMDGPU::S_NOP &&
613-
MI.getNumOperands() == 1 && // Don't merge with implicit operands
614-
Next != MBB.end() &&
615-
(*Next).getOpcode() == AMDGPU::S_NOP &&
616-
(*Next).getNumOperands() == 1) {
617-
618-
MachineInstr &NextMI = *Next;
619-
// The instruction encodes the amount to wait with an offset of 1,
620-
// i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
621-
// after adding.
622-
uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
623-
uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
624-
625-
// Make sure we don't overflow the bounds.
626-
if (Nop0 + Nop1 <= 8) {
627-
NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
628-
MI.eraseFromParent();
629-
}
630-
631-
continue;
632-
}
633-
634605
// FIXME: We also need to consider movs of constant operands since
635606
// immediate operands are not folded if they have more than one use, and
636607
// the operand folding pass is unaware if the immediate will be free since

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(
88
; GFX9: ; %bb.0:
99
; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0
1010
; GFX9-NEXT: s_lshl_b32 m0, s4, 1
11-
; GFX9-NEXT: s_nop 0
1211
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1312
; GFX9-NEXT: s_movrels_b64 s[0:1], s[8:9]
1413
; GFX9-NEXT: s_movrels_b64 s[2:3], s[10:11]

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -887,8 +887,8 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
887887
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
888888
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
889889
; GFX8-NEXT: s_and_b64 vcc, vcc, s[2:3]
890-
; GFX8-NEXT: s_nop 1
891890
; GFX8-NEXT: s_waitcnt vmcnt(0)
891+
; GFX8-NEXT: s_nop 0
892892
; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
893893
; GFX8-NEXT: v_mov_b32_e32 v0, s0
894894
; GFX8-NEXT: v_mov_b32_e32 v1, s1
@@ -992,8 +992,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
992992
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
993993
; GFX7-NEXT: s_mov_b32 s10, -1
994994
; GFX7-NEXT: s_mov_b64 s[6:7], s[10:11]
995-
; GFX7-NEXT: s_nop 1
996995
; GFX7-NEXT: s_waitcnt vmcnt(0)
996+
; GFX7-NEXT: s_nop 0
997997
; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
998998
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
999999
; GFX7-NEXT: s_endpgm
@@ -1026,8 +1026,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
10261026
; GFX8-NEXT: s_addc_u32 s1, s5, 0
10271027
; GFX8-NEXT: s_and_b32 s2, 1, s2
10281028
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
1029-
; GFX8-NEXT: s_nop 3
10301029
; GFX8-NEXT: s_waitcnt vmcnt(0)
1030+
; GFX8-NEXT: s_nop 2
10311031
; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
10321032
; GFX8-NEXT: v_mov_b32_e32 v0, s0
10331033
; GFX8-NEXT: v_mov_b32_e32 v1, s1

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i
4848
; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
4949
; GFX8-NEXT: v_mov_b32_e32 v5, s3
5050
; GFX8-NEXT: v_mov_b32_e32 v4, s2
51-
; GFX8-NEXT: s_nop 0
5251
; GFX8-NEXT: s_waitcnt vmcnt(0)
53-
; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
52+
; GFX8-NEXT: s_nop 0
5453
; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
54+
; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
5555
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
5656
; GFX8-NEXT: s_endpgm
5757
;

0 commit comments

Comments
 (0)