Skip to content

Commit 7ca3f36

Browse files
committed
[AMDGPU] Add SubtargetFeature for dynamic VGPR mode llvm#130030
1 parent cf7e4bf commit 7ca3f36

File tree

5 files changed

+26
-5
lines changed

5 files changed

+26
-5
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,12 @@ For example:
772772
enabled will execute correctly but may be less
773773
performant than code generated for XNACK replay
774774
disabled.
775+
776+
dynamic-vgpr TODO Represents the "Dynamic VGPR" hardware mode, introduced in GFX12.
777+
Waves launched in this mode may allocate or deallocate the VGPRs
778+
using dedicated instructions, but may not send the DEALLOC_VGPRS
779+
message.
780+
775781
=============== ============================ ==================================================
776782

777783
.. _amdgpu-target-id:

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
12451245
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
12461246
>;
12471247

1248+
def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
1249+
"DynamicVGPR",
1250+
"true",
1251+
"Enable dynamic VGPR mode"
1252+
>;
1253+
12481254
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
12491255
// restoring the callee-saved registers.
12501256
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,9 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
14171417
MD->setHwStage(CC, ".trap_present",
14181418
(bool)CurrentProgramInfo.TrapHandlerEnable);
14191419
MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
1420+
1421+
if (ST.isDynamicVGPREnabled())
1422+
MD->setComputeRegisters(".dynamic_vgpr_en", true);
14201423
}
14211424

14221425
MD->setHwStage(CC, ".lds_size",

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
190190
/// indicates a lack of S_CLAUSE support.
191191
unsigned MaxHardClauseLength = 0;
192192
bool SupportsSRAMECC = false;
193+
bool DynamicVGPR = false;
193194

194195
// This should not be used directly. 'TargetID' tracks the dynamic settings
195196
// for SRAMECC.
@@ -1660,6 +1661,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
16601661
return true;
16611662
}
16621663

1664+
bool isDynamicVGPREnabled() const { return DynamicVGPR; }
1665+
16631666
bool requiresDisjointEarlyClobberAndUndef() const override {
16641667
// AMDGPU doesn't care if early-clobber and undef operands are allocated
16651668
// to the same register.

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
1+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s --check-prefixes=CHECK,GFX11
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 <%s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr <%s | FileCheck %s --check-prefixes=CHECK,DVGPR
24

35
; CHECK-LABEL: {{^}}_amdgpu_cs_main:
46
; CHECK: ; TotalNumSgprs: 4
@@ -8,6 +10,7 @@
810
; CHECK-NEXT: amdpal.pipelines:
911
; CHECK-NEXT: - .api: Vulkan
1012
; CHECK-NEXT: .compute_registers:
13+
; DVGPR-NEXT: .dynamic_vgpr_en: true
1114
; CHECK-NEXT: .tg_size_en: true
1215
; CHECK-NEXT: .tgid_x_en: false
1316
; CHECK-NEXT: .tgid_y_en: false
@@ -57,7 +60,7 @@
5760
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
5861
; CHECK-NEXT: .excp_en: 0
5962
; CHECK-NEXT: .float_mode: 0xc0
60-
; CHECK-NEXT: .ieee_mode: false
63+
; GFX11-NEXT: .ieee_mode: false
6164
; CHECK-NEXT: .image_op: false
6265
; CHECK-NEXT: .lds_size: 0
6366
; CHECK-NEXT: .mem_ordered: true
@@ -112,7 +115,7 @@
112115
; CHECK-NEXT: .debug_mode: false
113116
; CHECK-NEXT: .entry_point: _amdgpu_gs
114117
; CHECK-NEXT: .entry_point_symbol: gs_shader
115-
; CHECK-NEXT: .ieee_mode: false
118+
; GFX11-NEXT: .ieee_mode: false
116119
; CHECK-NEXT: .lds_size: 0x200
117120
; CHECK-NEXT: .mem_ordered: true
118121
; CHECK-NEXT: .scratch_en: false
@@ -124,7 +127,7 @@
124127
; CHECK-NEXT: .debug_mode: false
125128
; CHECK-NEXT: .entry_point: _amdgpu_hs
126129
; CHECK-NEXT: .entry_point_symbol: hs_shader
127-
; CHECK-NEXT: .ieee_mode: false
130+
; GFX11-NEXT: .ieee_mode: false
128131
; CHECK-NEXT: .lds_size: 0x1000
129132
; CHECK-NEXT: .mem_ordered: true
130133
; CHECK-NEXT: .scratch_en: false
@@ -136,7 +139,7 @@
136139
; CHECK-NEXT: .debug_mode: false
137140
; CHECK-NEXT: .entry_point: _amdgpu_ps
138141
; CHECK-NEXT: .entry_point_symbol: ps_shader
139-
; CHECK-NEXT: .ieee_mode: false
142+
; GFX11-NEXT: .ieee_mode: false
140143
; CHECK-NEXT: .lds_size: 0
141144
; CHECK-NEXT: .mem_ordered: true
142145
; CHECK-NEXT: .scratch_en: false

0 commit comments

Comments
 (0)