Skip to content

Commit b5a034b

Browse files
committed
[AMDGPU] Introduce Code Object V6
Introduce Code Object V6 in Clang, LLD, Flang and LLVM. This is the same as V5 except a new "generic version" flag can be present in EFLAGS. This is related to new generic targets that'll be added in a follow-up patch. It's also likely V6 will have new changes (possibly new metadata entries) added later. Docs change are not included, I'm planning to do them in a follow-up patch all at once (when generic targets land too).
1 parent ba1e84f commit b5a034b

File tree

52 files changed

+483
-135
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+483
-135
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4783,9 +4783,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
47834783
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
47844784
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
47854785
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
4786-
Values<"none,4,5">,
4786+
Values<"none,4,5,6">,
47874787
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
4788-
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
4788+
NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
47894789
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
47904790

47914791
defm cumode : SimpleMFlag<"cumode",

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17721,9 +17721,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
1772117721
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
1772217722
/// Emit code based on Code Object ABI version.
1772317723
/// COV_4 : Emit code to use dispatch ptr
17724-
/// COV_5 : Emit code to use implicitarg ptr
17724+
/// COV_5+ : Emit code to use implicitarg ptr
1772517725
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
17726-
/// and use its value for COV_4 or COV_5 approach. It is used for
17726+
/// and use its value for COV_4 or COV_5+ approach. It is used for
1772717727
/// compiling device libraries in an ABI-agnostic way.
1772817728
///
1772917729
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
@@ -17766,7 +17766,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
1776617766
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
1776717767
} else {
1776817768
Value *GEP = nullptr;
17769-
if (Cov == CodeObjectVersionKind::COV_5) {
17769+
if (Cov >= CodeObjectVersionKind::COV_5) {
1777017770
// Indexing the implicit kernarg segment.
1777117771
GEP = CGF.Builder.CreateConstGEP1_32(
1777217772
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2600,7 +2600,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
26002600
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
26012601
const llvm::opt::ArgList &Args) {
26022602
const unsigned MinCodeObjVer = 4;
2603-
const unsigned MaxCodeObjVer = 5;
2603+
const unsigned MaxCodeObjVer = 6;
26042604

26052605
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
26062606
if (CodeObjArg->getOption().getID() ==

clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
55
// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
66

7+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
8+
// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
9+
710
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
811
// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
912

@@ -15,6 +18,10 @@
1518
// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
1619
// RUN: FileCheck -check-prefix=LINKED5 %s
1720

21+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
22+
// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
23+
// RUN: FileCheck -check-prefix=LINKED6 %s
24+
1825
#include "Inputs/cuda.h"
1926

2027
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
@@ -77,6 +84,36 @@
7784
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
7885
// LINKED5: "amdgpu_code_object_version", i32 500
7986

87+
// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
88+
// LINKED6-LABEL: bar
89+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
90+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
91+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
92+
// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
93+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
94+
// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
95+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
96+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
97+
98+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
99+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
100+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
101+
// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
102+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
103+
// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
104+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
105+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
106+
107+
// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
108+
// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
109+
// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
110+
// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
111+
// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
112+
// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
113+
// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
114+
// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
115+
// LINKED6: "amdgpu_code_object_version", i32 600
116+
80117
#ifdef DEVICELIB
81118
__device__ void bar(int *x, int *y, int *z)
82119
{

clang/test/CodeGenCUDA/amdgpu-code-object-version.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
1010
// RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s
1111

12+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
13+
// RUN: -mcode-object-version=6 -o - %s | FileCheck -check-prefix=V6 %s
14+
1215
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
1316
// RUN: -mcode-object-version=none -o - %s | FileCheck %s -check-prefix=NONE
1417

@@ -17,5 +20,6 @@
1720

1821
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
1922
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
23+
// V6: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 600}
2024
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
2125
// INV: error: invalid value '4.1' in '-mcode-object-version=4.1'

clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
88
// RUN: | FileCheck -check-prefix=COV5 %s
99

10+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
11+
// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
12+
// RUN: | FileCheck -check-prefix=COV5 %s
13+
1014
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
1115
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
1216
// RUN: | FileCheck -check-prefix=COVNONE %s

clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc

Whitespace-only changes.

clang/test/Driver/hip-code-object-version.hip

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@
2323
// V5: "-mllvm" "--amdhsa-code-object-version=5"
2424
// V5: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
2525

26+
// Check bundle ID for code object version 6.
27+
28+
// RUN: not %clang -### --target=x86_64-linux-gnu \
29+
// RUN: -mcode-object-version=6 \
30+
// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
31+
// RUN: %s 2>&1 | FileCheck -check-prefix=V6 %s
32+
33+
// V6: "-mcode-object-version=6"
34+
// V6: "-mllvm" "--amdhsa-code-object-version=6"
35+
// V6: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
36+
37+
2638
// Check bundle ID for code object version default
2739

2840
// RUN: %clang -### --target=x86_64-linux-gnu \

clang/test/Driver/hip-device-libs.hip

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,26 @@
187187
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
188188
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
189189

190-
// Test -mcode-object-version=5 with old device library without abi_version_400.bc
190+
// Test -mcode-object-version=5 with old device library without abi_version_500.bc
191191
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
192192
// RUN: -mcode-object-version=5 \
193193
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
194194
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
195195
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
196196

197+
// Test -mcode-object-version=6
198+
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
199+
// RUN: -mcode-object-version=6 \
200+
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
201+
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
202+
203+
// Test -mcode-object-version=6 with old device library without abi_version_600.bc
204+
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
205+
// RUN: -mcode-object-version=6 \
206+
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
207+
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
208+
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI6
209+
197210
// ALL-NOT: error:
198211
// ALL: {{"[^"]*clang[^"]*"}}
199212

@@ -237,7 +250,10 @@
237250
// ABI4: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
238251
// ABI5-NOT: error:
239252
// ABI5: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
253+
// ABI6-NOT: error:
254+
// ABI6: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_600.bc"
240255
// NOABI4-NOT: error:
241256
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
242257
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
243258
// NOABI5: error: cannot find ROCm device libraryfor ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
259+
// NOABI6: error: cannot find ROCm device libraryfor ABI version 6; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
284284
if (const llvm::opt::Arg *a = args.getLastArg(
285285
clang::driver::options::OPT_mcode_object_version_EQ)) {
286286
llvm::StringRef s = a->getValue();
287+
if (s == "6")
288+
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6;
287289
if (s == "5")
288290
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5;
289291
if (s == "4")

flang/test/Lower/AMD/code-object-version.f90

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s
44
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
55
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
6+
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s
67

78
!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
89
!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
910
!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
1011
!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
12+
!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32
1113
subroutine target_simple
1214
end subroutine target_simple
13-

lld/ELF/Arch/AMDGPU.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class AMDGPU final : public TargetInfo {
2525
private:
2626
uint32_t calcEFlagsV3() const;
2727
uint32_t calcEFlagsV4() const;
28+
uint32_t calcEFlagsV6() const;
2829

2930
public:
3031
AMDGPU();
@@ -106,6 +107,24 @@ uint32_t AMDGPU::calcEFlagsV4() const {
106107
return retMach | retXnack | retSramEcc;
107108
}
108109

110+
uint32_t AMDGPU::calcEFlagsV6() const {
111+
uint32_t flags = calcEFlagsV4();
112+
113+
uint32_t genericVersion =
114+
getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION;
115+
116+
// Verify that all input files have compatible generic version.
117+
for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) {
118+
if (genericVersion != (getEFlags(f) & EF_AMDGPU_GENERIC_VERSION)) {
119+
error("incompatible generic version: " + toString(f));
120+
return 0;
121+
}
122+
}
123+
124+
flags |= genericVersion;
125+
return flags;
126+
}
127+
109128
uint32_t AMDGPU::calcEFlags() const {
110129
if (ctx.objectFiles.empty())
111130
return 0;
@@ -121,6 +140,8 @@ uint32_t AMDGPU::calcEFlags() const {
121140
case ELFABIVERSION_AMDGPU_HSA_V4:
122141
case ELFABIVERSION_AMDGPU_HSA_V5:
123142
return calcEFlagsV4();
143+
case ELFABIVERSION_AMDGPU_HSA_V6:
144+
return calcEFlagsV6();
124145
default:
125146
error("unknown abi version: " + Twine(abiVersion));
126147
return 0;

lld/test/ELF/amdgpu-tid.s

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,19 @@
4343
# SRAMECC-OFF: EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)
4444
# SRAMECC-ON: EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)
4545
# SRAMECC-INCOMPATIBLE: incompatible sramecc:
46+
47+
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_0.o
48+
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_1.o
49+
# RUN: ld.lld -shared %t-genericv1_0.o %t-genericv1_1.o -o %t-genericv1_2.so
50+
# RUN: llvm-readobj --file-headers %t-genericv1_2.so | FileCheck --check-prefix=GENERICV1 %s
51+
52+
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_0.o
53+
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_1.o
54+
# RUN: ld.lld -shared %t-genericv2_0.o %t-genericv2_1.o -o %t-genericv2_2.so
55+
# RUN: llvm-readobj --file-headers %t-genericv2_2.so | FileCheck --check-prefix=GENERICV2 %s
56+
57+
# RUN: not ld.lld -shared %t-genericv1_0.o %t-genericv2_0.o -o /dev/null 2>&1 | FileCheck --check-prefix=GENERIC-INCOMPATIBLE %s
58+
59+
# GENERICV1: EF_AMDGPU_GENERIC_VERSION_V1 (0x1000000)
60+
# GENERICV2: EF_AMDGPU_GENERIC_VERSION_V2 (0x2000000)
61+
# GENERIC-INCOMPATIBLE: incompatible generic version

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,8 @@ enum {
375375
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
376376
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
377377
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
378-
ELFABIVERSION_AMDGPU_HSA_V5 = 3
378+
ELFABIVERSION_AMDGPU_HSA_V5 = 3,
379+
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
379380
};
380381

381382
#define ELF_RELOC(name, value) name = value,
@@ -842,6 +843,12 @@ enum : unsigned {
842843
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
843844
// SRAMECC is on.
844845
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
846+
847+
// Generic target versioning. This is contained in the list byte of EFLAGS.
848+
EF_AMDGPU_GENERIC_VERSION = 0xff000000,
849+
EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
850+
EF_AMDGPU_GENERIC_VERSION_MIN = 1,
851+
EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
845852
};
846853

847854
// ELF Relocation types for AMDGPU

llvm/include/llvm/Support/AMDGPUMetadata.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,15 @@ constexpr uint32_t VersionMajorV5 = 1;
4444
/// HSA metadata minor version for code object V5.
4545
constexpr uint32_t VersionMinorV5 = 2;
4646

47+
/// HSA metadata major version for code object V6.
48+
constexpr uint32_t VersionMajorV6 = 1;
49+
/// HSA metadata minor version for code object V6.
50+
constexpr uint32_t VersionMinorV6 = 3;
51+
4752
/// Old HSA metadata beginning assembler directive for V2. This is only used for
4853
/// diagnostics now.
54+
55+
/// HSA metadata beginning assembler directive.
4956
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
5057

5158
/// Access qualifiers.

llvm/include/llvm/Support/ScopedPrinter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class ScopedPrinter {
160160
template <typename T, typename TFlag>
161161
void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag>> Flags,
162162
TFlag EnumMask1 = {}, TFlag EnumMask2 = {},
163-
TFlag EnumMask3 = {}) {
163+
TFlag EnumMask3 = {}, TFlag EnumMask4 = {}) {
164164
SmallVector<FlagEntry, 10> SetFlags;
165165

166166
for (const auto &Flag : Flags) {
@@ -174,6 +174,8 @@ class ScopedPrinter {
174174
EnumMask = EnumMask2;
175175
else if (Flag.Value & EnumMask3)
176176
EnumMask = EnumMask3;
177+
else if (Flag.Value & EnumMask4)
178+
EnumMask = EnumMask4;
177179
bool IsEnum = (Flag.Value & EnumMask) != 0;
178180
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
179181
(IsEnum && (Value & EnumMask) == Flag.Value)) {

llvm/include/llvm/Target/TargetOptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ namespace llvm {
129129
COV_3 = 300, // Unsupported.
130130
COV_4 = 400,
131131
COV_5 = 500,
132+
COV_6 = 600,
132133
};
133134

134135
class TargetOptions {

llvm/lib/ObjectYAML/ELFYAML.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,15 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
620620
BCase(EF_AMDGPU_FEATURE_XNACK_V3);
621621
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
622622
break;
623+
case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
624+
for (unsigned K = ELF::EF_AMDGPU_GENERIC_VERSION_MIN;
625+
K <= ELF::EF_AMDGPU_GENERIC_VERSION_MAX; ++K) {
626+
std::string Key = "EF_AMDGPU_GENERIC_VERSION_V" + std::to_string(K);
627+
IO.maskedBitSetCase(Value, Key.c_str(),
628+
K << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET,
629+
ELF::EF_AMDGPU_GENERIC_VERSION);
630+
}
631+
[[fallthrough]];
623632
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
624633
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
625634
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,9 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
335335
case AMDGPU::AMDHSA_COV5:
336336
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
337337
break;
338+
case AMDGPU::AMDHSA_COV6:
339+
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV6());
340+
break;
338341
default:
339342
report_fatal_error("Unexpected code object version");
340343
}

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,16 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
678678
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
679679
}
680680

681+
//===----------------------------------------------------------------------===//
682+
// HSAMetadataStreamerV6
683+
//===----------------------------------------------------------------------===//
684+
685+
void MetadataStreamerMsgPackV6::emitVersion() {
686+
auto Version = HSAMetadataDoc->getArrayNode();
687+
Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
688+
Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
689+
getRootMetadata("amdhsa.version") = Version;
690+
}
681691

682692
} // end namespace HSAMD
683693
} // end namespace AMDGPU

0 commit comments

Comments
 (0)