Skip to content

Commit a3a3e69

Browse files
authored
[AMDGPU] Rewrite GFX12 SGPR hazard handling to dedicated pass (#118750)
- Algorithm operates over whole IR to attempt to minimize waits. - Add support for VALU->VALU SGPR hazards via VA_SDST/VA_VCC.
1 parent 59613ac commit a3a3e69

File tree

98 files changed

+2951
-2095
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+2951
-2095
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,20 @@ The AMDGPU backend supports the following LLVM IR attributes.
17091709
as hidden. Hidden arguments are managed by the compiler and are not part of
17101710
the explicit arguments supplied by the user.
17111711

1712+
"amdgpu-sgpr-hazard-wait" Disabled SGPR hazard wait insertion if set to 0.
1713+
Exists for testing performance impact of SGPR hazard waits only.
1714+
1715+
"amdgpu-sgpr-hazard-boundary-cull" Enable insertion of SGPR hazard cull sequences at function call boundaries.
1716+
Cull sequence reduces future hazard waits, but has a performance cost.
1717+
1718+
"amdgpu-sgpr-hazard-mem-wait-cull" Enable insertion of SGPR hazard cull sequences before memory waits.
1719+
Cull sequence reduces future hazard waits, but has a performance cost.
1720+
Attempt to amortize cost by overlapping with memory accesses.
1721+
1722+
"amdgpu-sgpr-hazard-mem-wait-cull-threshold"
1723+
Sets the number of active SGPR hazards that must be present before
1724+
inserting a cull sequence at a memory wait.
1725+
17121726
======================================= ==========================================================
17131727

17141728
Calling Conventions

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
463463
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
464464
extern char &GCNRewritePartialRegUsesID;
465465

466+
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
467+
extern char &AMDGPUWaitSGPRHazardsLegacyID;
468+
466469
namespace AMDGPU {
467470
enum TargetIndex {
468471
TI_CONSTDATA_START,

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "AMDGPUTargetObjectFile.h"
3030
#include "AMDGPUTargetTransformInfo.h"
3131
#include "AMDGPUUnifyDivergentExitNodes.h"
32+
#include "AMDGPUWaitSGPRHazards.h"
3233
#include "GCNDPPCombine.h"
3334
#include "GCNIterativeScheduler.h"
3435
#include "GCNSchedStrategy.h"
@@ -549,6 +550,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
549550
initializeGCNRewritePartialRegUsesPass(*PR);
550551
initializeGCNRegPressurePrinterPass(*PR);
551552
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
553+
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
552554
}
553555

554556
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1678,6 +1680,8 @@ void GCNPassConfig::addPreEmitPass() {
16781680
// cases.
16791681
addPass(&PostRAHazardRecognizerID);
16801682

1683+
addPass(&AMDGPUWaitSGPRHazardsLegacyID);
1684+
16811685
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
16821686
addPass(&AMDGPUInsertDelayAluID);
16831687

0 commit comments

Comments
 (0)