Skip to content

Commit b7dbcdf

Browse files
committed
[AMDGPU] Implement workaround for GFX11.5 export priority (llvm#99273)
On GFX11.5 shaders having completed exports need to execute/wait at a lower priority than shaders still executing exports. Add code to maintain normal priority of 2 for shaders that export and drop to priority 0 after exports. (cherry picked from commit 939a662)
1 parent 80b8245 commit b7dbcdf

File tree

6 files changed

+761
-4
lines changed

6 files changed

+761
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,7 +1769,8 @@ def FeatureISAVersion11_Generic: FeatureSet<
17691769
FeatureUserSGPRInit16Bug,
17701770
FeatureMADIntraFwdBug,
17711771
FeaturePrivEnabledTrap2NopBug,
1772-
FeatureRequiresCOV6])>;
1772+
FeatureRequiresCOV6,
1773+
FeatureRequiredExportPriority])>;
17731774

17741775
def FeatureISAVersion11_0_Common : FeatureSet<
17751776
!listconcat(FeatureISAVersion11_Common.Features,
@@ -1799,20 +1800,23 @@ def FeatureISAVersion11_5_0 : FeatureSet<
17991800
!listconcat(FeatureISAVersion11_Common.Features,
18001801
[FeatureSALUFloatInsts,
18011802
FeatureDPPSrc1SGPR,
1802-
FeatureVGPRSingleUseHintInsts])>;
1803+
FeatureVGPRSingleUseHintInsts,
1804+
FeatureRequiredExportPriority])>;
18031805

18041806
def FeatureISAVersion11_5_1 : FeatureSet<
18051807
!listconcat(FeatureISAVersion11_Common.Features,
18061808
[FeatureSALUFloatInsts,
18071809
FeatureDPPSrc1SGPR,
18081810
FeatureVGPRSingleUseHintInsts,
1809-
Feature1_5xVGPRs])>;
1811+
Feature1_5xVGPRs,
1812+
FeatureRequiredExportPriority])>;
18101813

18111814
def FeatureISAVersion11_5_2 : FeatureSet<
18121815
!listconcat(FeatureISAVersion11_Common.Features,
18131816
[FeatureSALUFloatInsts,
18141817
FeatureDPPSrc1SGPR,
1815-
FeatureVGPRSingleUseHintInsts])>;
1818+
FeatureVGPRSingleUseHintInsts,
1819+
FeatureRequiredExportPriority])>;
18161820

18171821
def FeatureISAVersion11_5_3 : FeatureSet<
18181822
!listconcat(FeatureISAVersion11_Common.Features,

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNSubtarget.h"
1515
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1616
#include "SIMachineFunctionInfo.h"
17+
#include "llvm/CodeGen/MachineFrameInfo.h"
1718
#include "llvm/CodeGen/MachineFunction.h"
1819
#include "llvm/CodeGen/ScheduleDAG.h"
1920
#include "llvm/TargetParser/TargetParser.h"
@@ -1203,6 +1204,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12031204
fixWMMAHazards(MI);
12041205
fixShift64HighRegBug(MI);
12051206
fixVALUMaskWriteHazard(MI);
1207+
fixRequiredExportPriority(MI);
12061208
}
12071209

12081210
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3067,3 +3069,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
30673069

30683070
return true;
30693071
}
3072+
3073+
static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
3074+
const SIInstrInfo &TII) {
3075+
MachineBasicBlock &EntryMBB = MF->front();
3076+
if (EntryMBB.begin() != EntryMBB.end()) {
3077+
auto &EntryMI = *EntryMBB.begin();
3078+
if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
3079+
EntryMI.getOperand(0).getImm() >= Priority)
3080+
return false;
3081+
}
3082+
3083+
BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
3084+
.addImm(Priority);
3085+
return true;
3086+
}
3087+
3088+
bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
3089+
if (!ST.hasRequiredExportPriority())
3090+
return false;
3091+
3092+
// Assume the following shader types will never have exports,
3093+
// and avoid adding or adjusting S_SETPRIO.
3094+
MachineBasicBlock *MBB = MI->getParent();
3095+
MachineFunction *MF = MBB->getParent();
3096+
auto CC = MF->getFunction().getCallingConv();
3097+
switch (CC) {
3098+
case CallingConv::AMDGPU_CS:
3099+
case CallingConv::AMDGPU_CS_Chain:
3100+
case CallingConv::AMDGPU_CS_ChainPreserve:
3101+
case CallingConv::AMDGPU_KERNEL:
3102+
return false;
3103+
default:
3104+
break;
3105+
}
3106+
3107+
const int MaxPriority = 3;
3108+
const int NormalPriority = 2;
3109+
const int PostExportPriority = 0;
3110+
3111+
auto It = MI->getIterator();
3112+
switch (MI->getOpcode()) {
3113+
case AMDGPU::S_ENDPGM:
3114+
case AMDGPU::S_ENDPGM_SAVED:
3115+
case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
3116+
case AMDGPU::SI_RETURN_TO_EPILOG:
3117+
// Ensure shader with calls raises priority at entry.
3118+
// This ensures correct priority if exports exist in callee.
3119+
if (MF->getFrameInfo().hasCalls())
3120+
return ensureEntrySetPrio(MF, NormalPriority, TII);
3121+
return false;
3122+
case AMDGPU::S_SETPRIO: {
3123+
// Raise minimum priority unless in workaround.
3124+
auto &PrioOp = MI->getOperand(0);
3125+
int Prio = PrioOp.getImm();
3126+
bool InWA = (Prio == PostExportPriority) &&
3127+
(It != MBB->begin() && TII.isEXP(*std::prev(It)));
3128+
if (InWA || Prio >= NormalPriority)
3129+
return false;
3130+
PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
3131+
return true;
3132+
}
3133+
default:
3134+
if (!TII.isEXP(*MI))
3135+
return false;
3136+
break;
3137+
}
3138+
3139+
// Check entry priority at each export (as there will only be a few).
3140+
// Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
3141+
bool Changed = false;
3142+
if (CC != CallingConv::AMDGPU_Gfx)
3143+
Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
3144+
3145+
auto NextMI = std::next(It);
3146+
bool EndOfShader = false;
3147+
if (NextMI != MBB->end()) {
3148+
// Only need WA at end of sequence of exports.
3149+
if (TII.isEXP(*NextMI))
3150+
return Changed;
3151+
// Assume appropriate S_SETPRIO after export means WA already applied.
3152+
if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
3153+
NextMI->getOperand(0).getImm() == PostExportPriority)
3154+
return Changed;
3155+
EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
3156+
}
3157+
3158+
const DebugLoc &DL = MI->getDebugLoc();
3159+
3160+
// Lower priority.
3161+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
3162+
.addImm(PostExportPriority);
3163+
3164+
if (!EndOfShader) {
3165+
// Wait for exports to complete.
3166+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
3167+
.addReg(AMDGPU::SGPR_NULL)
3168+
.addImm(0);
3169+
}
3170+
3171+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
3172+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
3173+
3174+
if (!EndOfShader) {
3175+
// Return to normal (higher) priority.
3176+
BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
3177+
.addImm(NormalPriority);
3178+
}
3179+
3180+
return true;
3181+
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
107107
bool fixWMMAHazards(MachineInstr *MI);
108108
bool fixShift64HighRegBug(MachineInstr *MI);
109109
bool fixVALUMaskWriteHazard(MachineInstr *MI);
110+
bool fixRequiredExportPriority(MachineInstr *MI);
110111

111112
int checkMAIHazards(MachineInstr *MI);
112113
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,6 +1323,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13231323

13241324
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
13251325

1326+
bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
1327+
13261328
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
13271329
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
13281330
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }

0 commit comments

Comments
 (0)