Skip to content

Commit 1a32613

Browse files
authored
[AMDGPU] Update pal metadata for v3.6 and fix v3.0 (llvm#135196)
Update entry_point for all pal versions below 3.6. 3.6 and above removes entry_point.
1 parent b8b35b9 commit 1a32613

File tree

13 files changed

+237
-30
lines changed

13 files changed

+237
-30
lines changed

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/MC/MCExpr.h"
2424
#include "llvm/Support/AMDGPUMetadata.h"
2525
#include "llvm/Support/EndianStream.h"
26+
#include "llvm/Support/VersionTuple.h"
2627

2728
using namespace llvm;
2829
using namespace llvm::AMDGPU;
@@ -259,13 +260,16 @@ void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
259260
getHwStage(CC)[".entry_point_symbol"] =
260261
MsgPackDoc.getNode(Name, /*Copy=*/true);
261262

262-
// Set .entry_point which is defined
263-
// to be _amdgpu_<stage> and _amdgpu_cs for non-shader functions
264-
SmallString<16> EPName("_amdgpu_");
265-
raw_svector_ostream EPNameOS(EPName);
266-
EPNameOS << getStageName(CC) + 1;
267-
getHwStage(CC)[".entry_point"] =
268-
MsgPackDoc.getNode(EPNameOS.str(), /*Copy=*/true);
263+
// For PAL version 3.6 and above, entry_point is no longer required.
264+
if (getPALVersion() < VersionTuple(3, 6)) {
265+
// Set .entry_point which is defined to be _amdgpu_<stage>_main and
266+
// _amdgpu_cs_main for non-shader functions.
267+
SmallString<16> EPName("_amdgpu_");
268+
raw_svector_ostream EPNameOS(EPName);
269+
EPNameOS << getStageName(CC) + 1 << "_main";
270+
getHwStage(CC)[".entry_point"] =
271+
MsgPackDoc.getNode(EPNameOS.str(), /*Copy=*/true);
272+
}
269273
}
270274

271275
// Set the number of used vgprs in the metadata. This is an optional
@@ -1052,6 +1056,10 @@ unsigned AMDGPUPALMetadata::getPALMajorVersion() { return getPALVersion(0); }
10521056

10531057
unsigned AMDGPUPALMetadata::getPALMinorVersion() { return getPALVersion(1); }
10541058

1059+
VersionTuple AMDGPUPALMetadata::getPALVersion() {
1060+
return VersionTuple(getPALVersion(0), getPALVersion(1));
1061+
}
1062+
10551063
// Set the field in a given .hardware_stages entry
10561064
void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) {
10571065
getHwStage(CC)[field] = Val;

llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
namespace llvm {
2222

2323
class Module;
24-
class StringRef;
2524

2625
class AMDGPUPALMetadata {
2726
public:
@@ -155,6 +154,7 @@ class AMDGPUPALMetadata {
155154

156155
unsigned getPALMajorVersion();
157156
unsigned getPALMinorVersion();
157+
VersionTuple getPALVersion();
158158

159159
void setHwStage(unsigned CC, StringRef field, unsigned Val);
160160
void setHwStage(unsigned CC, StringRef field, bool Val);

llvm/test/CodeGen/AMDGPU/amdpal-cs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; GCN-NEXT: amdpal.pipelines:
99
; GCN-NEXT: - .hardware_stages:
1010
; GCN-NEXT: .cs:
11-
; GCN-NEXT: .entry_point: _amdgpu_cs
11+
; GCN-NEXT: .entry_point: _amdgpu_cs_main
1212
; GCN-NEXT: .entry_point_symbol: cs_amdpal
1313
; GCN-NEXT: .scratch_memory_size: 0
1414
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal-es.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; GCN-NEXT: amdpal.pipelines:
88
; GCN-NEXT: - .hardware_stages:
99
; GCN-NEXT: .es:
10-
; GCN-NEXT: .entry_point: _amdgpu_es
10+
; GCN-NEXT: .entry_point: _amdgpu_es_main
1111
; GCN-NEXT: .entry_point_symbol: es_amdpal
1212
; GCN-NEXT: .scratch_memory_size: 0
1313
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal-gs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; GCN-NEXT: amdpal.pipelines:
99
; GCN-NEXT: - .hardware_stages:
1010
; GCN-NEXT: .gs:
11-
; GCN-NEXT: .entry_point: _amdgpu_gs
11+
; GCN-NEXT: .entry_point: _amdgpu_gs_main
1212
; GCN-NEXT: .entry_point_symbol: gs_amdpal
1313
; GCN-NEXT: .scratch_memory_size: 0
1414
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal-ls.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
; GCN-NEXT: amdpal.pipelines:
88
; GCN-NEXT: - .hardware_stages:
99
; GCN-NEXT: .ls:
10-
; GCN-NEXT: .entry_point: _amdgpu_ls
10+
; GCN-NEXT: .entry_point: _amdgpu_ls_main
1111
; GCN-NEXT: .entry_point_symbol: ls_amdpal
1212
; GCN-NEXT: .scratch_memory_size: 0
1313
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
; GCN-NEXT: amdpal.pipelines:
1212
; GCN-NEXT: - .hardware_stages:
1313
; GCN-NEXT: .ps:
14-
; GCN-NEXT: .entry_point: _amdgpu_ps
14+
; GCN-NEXT: .entry_point: _amdgpu_ps_main
1515
; GCN-NEXT: .entry_point_symbol: amdpal_psenable
1616
; GCN-NEXT: .scratch_memory_size: 0
1717
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal-vs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; GCN-NEXT: amdpal.pipelines:
99
; GCN-NEXT: - .hardware_stages:
1010
; GCN-NEXT: .vs:
11-
; GCN-NEXT: .entry_point: _amdgpu_vs
11+
; GCN-NEXT: .entry_point: _amdgpu_vs_main
1212
; GCN-NEXT: .entry_point_symbol: vs_amdpal
1313
; GCN-NEXT: .scratch_memory_size: 0
1414
; GCN: .registers:

llvm/test/CodeGen/AMDGPU/amdpal.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32,
8686
; PAL-NEXT: amdpal.pipelines:
8787
; PAL-NEXT: - .hardware_stages:
8888
; PAL-NEXT: .cs:
89-
; PAL-NEXT: .entry_point: _amdgpu_cs
89+
; PAL-NEXT: .entry_point: _amdgpu_cs_main
9090
; PAL-NEXT: .entry_point_symbol: scratch2_cs
9191
; PAL-NEXT: .scratch_memory_size: 0x10
9292
; PAL-NEXT: .sgpr_count: 0x

llvm/test/CodeGen/AMDGPU/elf-notes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
; OSABI-PAL-ELF: amdpal.pipelines:
6767
; OSABI-PAL-ELF: - .hardware_stages:
6868
; OSABI-PAL-ELF: .cs:
69-
; OSABI-PAL-ELF: .entry_point: _amdgpu_cs
69+
; OSABI-PAL-ELF: .entry_point: _amdgpu_cs_main
7070
; OSABI-PAL-ELF: .entry_point_symbol: elf_notes
7171
; OSABI-PAL-ELF: .scratch_memory_size: 0
7272
; OSABI-PAL-ELF: .sgpr_count: 96

llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
; CHECK-NEXT: .checksum_value: 0x9444d7d0
5959
; CHECK-NEXT: .debug_mode: false
6060
; DVGPR-NEXT: .dynamic_vgpr_saved_count: 0x70
61-
; CHECK-NEXT: .entry_point: _amdgpu_cs
61+
; CHECK-NEXT: .entry_point: _amdgpu_cs_main
6262
; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main
6363
; CHECK-NEXT: .excp_en: 0
6464
; CHECK-NEXT: .float_mode: 0xc0
@@ -116,7 +116,7 @@
116116
; CHECK-NEXT: .wgp_mode: false
117117
; CHECK-NEXT: .gs:
118118
; CHECK-NEXT: .debug_mode: false
119-
; CHECK-NEXT: .entry_point: _amdgpu_gs
119+
; CHECK-NEXT: .entry_point: _amdgpu_gs_main
120120
; CHECK-NEXT: .entry_point_symbol: gs_shader
121121
; GFX11-NEXT: .ieee_mode: false
122122
; CHECK-NEXT: .lds_size: 0x200
@@ -128,7 +128,7 @@
128128
; CHECK-NEXT: .wgp_mode: true
129129
; CHECK-NEXT: .hs:
130130
; CHECK-NEXT: .debug_mode: false
131-
; CHECK-NEXT: .entry_point: _amdgpu_hs
131+
; CHECK-NEXT: .entry_point: _amdgpu_hs_main
132132
; CHECK-NEXT: .entry_point_symbol: hs_shader
133133
; GFX11-NEXT: .ieee_mode: false
134134
; CHECK-NEXT: .lds_size: 0x1000
@@ -140,7 +140,7 @@
140140
; CHECK-NEXT: .wgp_mode: true
141141
; CHECK-NEXT: .ps:
142142
; CHECK-NEXT: .debug_mode: false
143-
; CHECK-NEXT: .entry_point: _amdgpu_ps
143+
; CHECK-NEXT: .entry_point: _amdgpu_ps_main
144144
; CHECK-NEXT: .entry_point_symbol: ps_shader
145145
; GFX11-NEXT: .ieee_mode: false
146146
; CHECK-NEXT: .lds_size: 0
@@ -180,32 +180,27 @@ define dllexport amdgpu_ps void @ps_shader() #1 {
180180

181181
@LDS.GS = external addrspace(3) global [1 x i32], align 4
182182

183-
define dllexport amdgpu_gs void @gs_shader() #2 {
183+
define dllexport amdgpu_gs void @gs_shader() {
184184
%ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0
185185
store i32 0, ptr addrspace(3) %ptr, align 4
186186
ret void
187187
}
188188

189189
@LDS.HS = external addrspace(3) global [1024 x i32], align 4
190190

191-
define dllexport amdgpu_hs void @hs_shader() #2 {
191+
define dllexport amdgpu_hs void @hs_shader() {
192192
%ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0
193193
store i32 0, ptr addrspace(3) %ptr, align 4
194194
ret void
195195
}
196196

197197
!amdgpu.pal.metadata.msgpack = !{!0}
198198

199-
; Function Attrs: nounwind willreturn memory(none)
200199
declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1
201-
202-
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
203-
declare i64 @llvm.amdgcn.s.getpc() #2
204-
205-
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
200+
declare i64 @llvm.amdgcn.s.getpc()
206201
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3
207202

208-
attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" }
203+
attributes #0 = { nounwind memory(readwrite) "target-features"=",+wavefrontsize64,+cumode" }
209204

210205
attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" }
211206

0 commit comments

Comments
 (0)