Skip to content

Commit caee918

Browse files
shiltiankzhuravl
authored andcommitted
[AMDGPU] Rewrite GFX12 SGPR hazard handling to dedicated pass (llvm#118750)
- Algorithm operates over whole IR to attempt to minimize waits. - Add support for VALU->VALU SGPR hazards via VA_SDST/VA_VCC. (cherry picked from commit a3a3e69)
1 parent f014129 commit caee918

File tree

99 files changed

+2806
-4190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+2806
-4190
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,24 @@ The AMDGPU backend supports the following LLVM IR attributes.
16971697
function which requires AGPRs is reached through any function marked
16981698
with this attribute.
16991699

1700+
"amdgpu-hidden-argument" This attribute is used internally by the backend to mark function arguments
1701+
as hidden. Hidden arguments are managed by the compiler and are not part of
1702+
the explicit arguments supplied by the user.
1703+
1704+
"amdgpu-sgpr-hazard-wait" Disabled SGPR hazard wait insertion if set to 0.
1705+
Exists for testing performance impact of SGPR hazard waits only.
1706+
1707+
"amdgpu-sgpr-hazard-boundary-cull" Enable insertion of SGPR hazard cull sequences at function call boundaries.
1708+
Cull sequence reduces future hazard waits, but has a performance cost.
1709+
1710+
"amdgpu-sgpr-hazard-mem-wait-cull" Enable insertion of SGPR hazard cull sequences before memory waits.
1711+
Cull sequence reduces future hazard waits, but has a performance cost.
1712+
Attempt to amortize cost by overlapping with memory accesses.
1713+
1714+
"amdgpu-sgpr-hazard-mem-wait-cull-threshold"
1715+
Sets the number of active SGPR hazards that must be present before
1716+
inserting a cull sequence at a memory wait.
1717+
17001718
======================================= ==========================================================
17011719

17021720
Calling Conventions

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,9 @@ void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
421421
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
422422
extern char &GCNRewritePartialRegUsesID;
423423

424+
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
425+
extern char &AMDGPUWaitSGPRHazardsLegacyID;
426+
424427
namespace AMDGPU {
425428
enum TargetIndex {
426429
TI_CONSTDATA_START,

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "AMDGPUTargetObjectFile.h"
2727
#include "AMDGPUTargetTransformInfo.h"
2828
#include "AMDGPUUnifyDivergentExitNodes.h"
29+
#include "AMDGPUWaitSGPRHazards.h"
2930
#include "GCNIterativeScheduler.h"
3031
#include "GCNSchedStrategy.h"
3132
#include "GCNVOPDUtils.h"
@@ -521,6 +522,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
521522
initializeGCNPreRALongBranchRegPass(*PR);
522523
initializeGCNRewritePartialRegUsesPass(*PR);
523524
initializeGCNRegPressurePrinterPass(*PR);
525+
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
524526
}
525527

526528
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1623,6 +1625,8 @@ void GCNPassConfig::addPreEmitPass() {
16231625
if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
16241626
addPass(&AMDGPUInsertSingleUseVDSTID);
16251627

1628+
addPass(&AMDGPUWaitSGPRHazardsLegacyID);
1629+
16261630
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
16271631
addPass(&AMDGPUInsertDelayAluID);
16281632

0 commit comments

Comments
 (0)