Skip to content

Commit 0a21ef9

Browse files
authored
[AMDGPU] Add SubtargetFeature for dynamic VGPR mode (#130030)
This represents a hardware mode supported only for wave32 compute shaders. When enabled, we set the `.dynamic_vgpr_en` field of `.compute_registers` to true in the PAL metadata. This will be changed to use an attribute after downstream consumers have been migrated.
1 parent 5865807 commit 0a21ef9

File tree

6 files changed

+41
-12
lines changed

6 files changed

+41
-12
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,12 @@ For example:
758758
enabled will execute correctly but may be less
759759
performant than code generated for XNACK replay
760760
disabled.
761+
762+
dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
763+
Waves launched in this mode may allocate or deallocate the VGPRs
764+
using dedicated instructions, but may not send the DEALLOC_VGPRS
765+
message.
766+
761767
=============== ============================ ==================================================
762768

763769
.. _amdgpu-target-id:

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,6 +1251,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
12511251
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
12521252
>;
12531253

1254+
def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
1255+
"DynamicVGPR",
1256+
"true",
1257+
"Enable dynamic VGPR mode"
1258+
>;
1259+
12541260
// Dummy feature used to disable assembler instructions.
12551261
def FeatureDisable : SubtargetFeature<"",
12561262
"FeatureDisable","true",

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
14151415
MD->setHwStage(CC, ".trap_present",
14161416
(bool)CurrentProgramInfo.TrapHandlerEnable);
14171417
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1418+
1419+
if (ST.isDynamicVGPREnabled())
1420+
MD->setComputeRegisters(".dynamic_vgpr_en", true);
14181421
}
14191422

14201423
MD->setHwStage(CC, ".lds_size",

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
191191
/// indicates a lack of S_CLAUSE support.
192192
unsigned MaxHardClauseLength = 0;
193193
bool SupportsSRAMECC = false;
194+
bool DynamicVGPR = false;
194195

195196
// This should not be used directly. 'TargetID' tracks the dynamic settings
196197
// for SRAMECC.
@@ -1653,6 +1654,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16531654
return true;
16541655
}
16551656

1657+
bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1658+
16561659
bool requiresDisjointEarlyClobberAndUndef() const override {
16571660
// AMDGPU doesn't care if early-clobber and undef operands are allocated
16581661
// to the same register.

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX11 %s
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12 %s
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,GFX12,DVGPR %s
24

35
; CHECK: .amdgpu_pal_metadata
46
; CHECK-NEXT: ---
57
; CHECK-NEXT: amdpal.pipelines:
68
; CHECK-NEXT: - .api: Vulkan
79
; CHECK-NEXT: .compute_registers:
10+
; DVGPR-NEXT: .dynamic_vgpr_en: true
811
; CHECK-NEXT: .tg_size_en: true
912
; CHECK-NEXT: .tgid_x_en: false
1013
; CHECK-NEXT: .tgid_y_en: false
@@ -16,7 +19,7 @@
1619
; CHECK-NEXT: .debug_mode: 0
1720
; CHECK-NEXT: .excp_en: 0
1821
; CHECK-NEXT: .float_mode: 0xc0
19-
; CHECK-NEXT: .ieee_mode: true
22+
; GFX11-NEXT: .ieee_mode: true
2023
; CHECK-NEXT: .image_op: false
2124
; CHECK-NEXT: .lds_size: 0x200
2225
; CHECK-NEXT: .mem_ordered: true
@@ -98,19 +101,22 @@
98101
; CHECK-NEXT: no_stack_extern_call:
99102
; CHECK-NEXT: .backend_stack_size: 0x10
100103
; CHECK-NEXT: .lds_size: 0
101-
; CHECK-NEXT: .sgpr_count: 0x29
104+
; GFX11-NEXT: .sgpr_count: 0x29
105+
; GFX12-NEXT: .sgpr_count: 0x24
102106
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
103107
; CHECK-NEXT: .vgpr_count: 0x58
104108
; CHECK-NEXT: no_stack_extern_call_many_args:
105109
; CHECK-NEXT: .backend_stack_size: 0x90
106110
; CHECK-NEXT: .lds_size: 0
107-
; CHECK-NEXT: .sgpr_count: 0x29
111+
; GFX11-NEXT: .sgpr_count: 0x29
112+
; GFX12-NEXT: .sgpr_count: 0x24
108113
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x90
109114
; CHECK-NEXT: .vgpr_count: 0x58
110115
; CHECK-NEXT: no_stack_indirect_call:
111116
; CHECK-NEXT: .backend_stack_size: 0x10
112117
; CHECK-NEXT: .lds_size: 0
113-
; CHECK-NEXT: .sgpr_count: 0x29
118+
; GFX11-NEXT: .sgpr_count: 0x29
119+
; GFX12-NEXT: .sgpr_count: 0x24
114120
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10
115121
; CHECK-NEXT: .vgpr_count: 0x58
116122
; CHECK-NEXT: simple_lds:
@@ -140,13 +146,15 @@
140146
; CHECK-NEXT: simple_stack_extern_call:
141147
; CHECK-NEXT: .backend_stack_size: 0x20
142148
; CHECK-NEXT: .lds_size: 0
143-
; CHECK-NEXT: .sgpr_count: 0x29
149+
; GFX11-NEXT: .sgpr_count: 0x29
150+
; GFX12-NEXT: .sgpr_count: 0x24
144151
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
145152
; CHECK-NEXT: .vgpr_count: 0x58
146153
; CHECK-NEXT: simple_stack_indirect_call:
147154
; CHECK-NEXT: .backend_stack_size: 0x20
148155
; CHECK-NEXT: .lds_size: 0
149-
; CHECK-NEXT: .sgpr_count: 0x29
156+
; GFX11-NEXT: .sgpr_count: 0x29
157+
; GFX12-NEXT: .sgpr_count: 0x24
150158
; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20
151159
; CHECK-NEXT: .vgpr_count: 0x58
152160
; CHECK-NEXT: simple_stack_recurse:

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
24

35
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
46
; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
810
; CHECK-NEXT: amdpal.pipelines:
911
; CHECK-NEXT: - .api: Vulkan
1012
; CHECK-NEXT: .compute_registers:
13+
; DVGPR-NEXT: .dynamic_vgpr_en: true
1114
; CHECK-NEXT: .tg_size_en: true
1215
; CHECK-NEXT: .tgid_x_en: false
1316
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +60,7 @@
5760
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
5861
; CHECK-NEXT: .excp_en: 0
5962
; CHECK-NEXT: .float_mode: 0xc0
60-
; CHECK-NEXT: .ieee_mode: false
63+
; GFX11-NEXT: .ieee_mode: false
6164
; CHECK-NEXT: .image_op: false
6265
; CHECK-NEXT: .lds_size: 0
6366
; CHECK-NEXT: .mem_ordered: true
@@ -112,7 +115,7 @@
112115
; CHECK-NEXT: .debug_mode: false
113116
; CHECK-NEXT: .entry_point: _amdgpu_gs
114117
; CHECK-NEXT: .entry_point_symbol: gs_shader
115-
; CHECK-NEXT: .ieee_mode: false
118+
; GFX11-NEXT: .ieee_mode: false
116119
; CHECK-NEXT: .lds_size: 0x200
117120
; CHECK-NEXT: .mem_ordered: true
118121
; CHECK-NEXT: .scratch_en: false
@@ -124,7 +127,7 @@
124127
; CHECK-NEXT: .debug_mode: false
125128
; CHECK-NEXT: .entry_point: _amdgpu_hs
126129
; CHECK-NEXT: .entry_point_symbol: hs_shader
127-
; CHECK-NEXT: .ieee_mode: false
130+
; GFX11-NEXT: .ieee_mode: false
128131
; CHECK-NEXT: .lds_size: 0x1000
129132
; CHECK-NEXT: .mem_ordered: true
130133
; CHECK-NEXT: .scratch_en: false
@@ -136,7 +139,7 @@
136139
; CHECK-NEXT: .debug_mode: false
137140
; CHECK-NEXT: .entry_point: _amdgpu_ps
138141
; CHECK-NEXT: .entry_point_symbol: ps_shader
139-
; CHECK-NEXT: .ieee_mode: false
142+
; GFX11-NEXT: .ieee_mode: false
140143
; CHECK-NEXT: .lds_size: 0
141144
; CHECK-NEXT: .mem_ordered: true
142145
; CHECK-NEXT: .scratch_en: false

0 commit comments

Comments
 (0)