Skip to content

Commit b2a7bdc

Browse files
committed
[AMDGPU] Add SubtargetFeature for dynamic VGPR mode
This represents a hardware mode supported only for wave32 compute shaders. When enabled, we set the `.dynamic_vgpr_en` field of `.compute_registers` to true in the PAL metadata.
1 parent 3cacd07 commit b2a7bdc

File tree

5 files changed

+26
-5
lines changed

5 files changed

+26
-5
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,12 @@ For example:
758758
enabled will execute correctly but may be less
759759
performant than code generated for XNACK replay
760760
disabled.
761+
762+
dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
763+
Waves launched in this mode may allocate or deallocate the VGPRs
764+
using dedicated instructions, but may not send the DEALLOC_VGPRS
765+
message.
766+
761767
=============== ============================ ==================================================
762768

763769
.. _amdgpu-target-id:

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
12391239
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
12401240
>;
12411241

1242+
def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
1243+
"DynamicVGPR",
1244+
"true",
1245+
"Enable dynamic VGPR mode"
1246+
>;
1247+
12421248
// Dummy feature used to disable assembler instructions.
12431249
def FeatureDisable : SubtargetFeature<"",
12441250
"FeatureDisable","true",

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1414,6 +1414,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
14141414
MD->setHwStage(CC, ".trap_present",
14151415
(bool)CurrentProgramInfo.TrapHandlerEnable);
14161416
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1417+
1418+
if (ST.isDynamicVGPREnabled())
1419+
MD->setComputeRegisters(".dynamic_vgpr_en", true);
14171420
}
14181421

14191422
MD->setHwStage(CC, ".lds_size",

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
190190
/// indicates a lack of S_CLAUSE support.
191191
unsigned MaxHardClauseLength = 0;
192192
bool SupportsSRAMECC = false;
193+
bool DynamicVGPR = false;
193194

194195
// This should not be used directly. 'TargetID' tracks the dynamic settings
195196
// for SRAMECC.
@@ -1647,6 +1648,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16471648
return true;
16481649
}
16491650

1651+
bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1652+
16501653
bool requiresDisjointEarlyClobberAndUndef() const override {
16511654
// AMDGPU doesn't care if early-clobber and undef operands are allocated
16521655
// to the same register.

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
24

35
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
46
; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
810
; CHECK-NEXT: amdpal.pipelines:
911
; CHECK-NEXT: - .api: Vulkan
1012
; CHECK-NEXT: .compute_registers:
13+
; DVGPR-NEXT: .dynamic_vgpr_en: true
1114
; CHECK-NEXT: .tg_size_en: true
1215
; CHECK-NEXT: .tgid_x_en: false
1316
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +60,7 @@
5760
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
5861
; CHECK-NEXT: .excp_en: 0
5962
; CHECK-NEXT: .float_mode: 0xc0
60-
; CHECK-NEXT: .ieee_mode: false
63+
; GFX11-NEXT: .ieee_mode: false
6164
; CHECK-NEXT: .image_op: false
6265
; CHECK-NEXT: .lds_size: 0
6366
; CHECK-NEXT: .mem_ordered: true
@@ -112,7 +115,7 @@
112115
; CHECK-NEXT: .debug_mode: false
113116
; CHECK-NEXT: .entry_point: _amdgpu_gs
114117
; CHECK-NEXT: .entry_point_symbol: gs_shader
115-
; CHECK-NEXT: .ieee_mode: false
118+
; GFX11-NEXT: .ieee_mode: false
116119
; CHECK-NEXT: .lds_size: 0x200
117120
; CHECK-NEXT: .mem_ordered: true
118121
; CHECK-NEXT: .scratch_en: false
@@ -124,7 +127,7 @@
124127
; CHECK-NEXT: .debug_mode: false
125128
; CHECK-NEXT: .entry_point: _amdgpu_hs
126129
; CHECK-NEXT: .entry_point_symbol: hs_shader
127-
; CHECK-NEXT: .ieee_mode: false
130+
; GFX11-NEXT: .ieee_mode: false
128131
; CHECK-NEXT: .lds_size: 0x1000
129132
; CHECK-NEXT: .mem_ordered: true
130133
; CHECK-NEXT: .scratch_en: false
@@ -136,7 +139,7 @@
136139
; CHECK-NEXT: .debug_mode: false
137140
; CHECK-NEXT: .entry_point: _amdgpu_ps
138141
; CHECK-NEXT: .entry_point_symbol: ps_shader
139-
; CHECK-NEXT: .ieee_mode: false
142+
; GFX11-NEXT: .ieee_mode: false
140143
; CHECK-NEXT: .lds_size: 0
141144
; CHECK-NEXT: .mem_ordered: true
142145
; CHECK-NEXT: .scratch_en: false

0 commit comments

Comments
 (0)