Skip to content

Commit 4c156a1

Browse files
committed
[AMDGPU] Change default AMDHSA Code Object version to 5
Also update LIT tests and docs. For more details, see https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata
1 parent d7fb9eb commit 4c156a1

File tree

12 files changed

+26
-26
lines changed

12 files changed

+26
-26
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4777,12 +4777,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
47774777
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;
47784778

47794779
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
4780-
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
4780+
HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
47814781
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
47824782
Values<"none,4,5">,
47834783
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
47844784
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
4785-
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
4785+
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
47864786

47874787
defm cumode : SimpleMFlag<"cumode",
47884788
"Specify CU wavefront", "Specify WGP wavefront",

clang/test/CodeGen/amdgpu-address-spaces.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
2929
// CHECK: @u = addrspace(5) global i32 undef, align 4
3030
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
3131
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
32-
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
32+
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
3333
//.
3434
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
3535
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {

clang/test/CodeGenCUDA/amdgpu-code-object-version.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Create module flag for code object version.
22

33
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
4-
// RUN: -o - %s | FileCheck %s -check-prefix=V4
4+
// RUN: -o - %s | FileCheck %s -check-prefix=V5
55

66
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
77
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s

clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
2-
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
2+
// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
33
// RUN: | FileCheck -check-prefix=PRECOV5 %s
44

55

66
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
7-
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
7+
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
88
// RUN: | FileCheck -check-prefix=COV5 %s
99

1010
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ __global__ void kernel() {
4646
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
4747
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
4848
//.
49-
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
49+
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5050
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
5151
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
5252
//.
53-
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
53+
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5454
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
5555
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
5656
//.

clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
703703
// GFX900: attributes #8 = { nounwind }
704704
// GFX900: attributes #9 = { convergent nounwind }
705705
//.
706-
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
706+
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
707707
// NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
708708
// NOCPU: !2 = !{i32 2, i32 0}
709709
// NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
721721
// NOCPU: !15 = !{i32 1}
722722
// NOCPU: !16 = !{!"int*"}
723723
//.
724-
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
724+
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
725725
// GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
726726
// GFX900: !2 = !{i32 2, i32 0}
727727
// GFX900: !3 = !{!4, !4, i64 0}

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -601,13 +601,13 @@ void test_get_local_id(int d, global int *out)
601601
}
602602

603603
// CHECK-LABEL: @test_get_workgroup_size(
604-
// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
605-
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
604+
// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
605+
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
606606
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
607-
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
607+
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
608608
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
609-
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
610-
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
609+
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
610+
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
611611
void test_get_workgroup_size(int d, global int *out)
612612
{
613613
switch (d) {

llvm/docs/AMDGPUUsage.rst

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,12 +1510,12 @@ The AMDGPU backend uses the following ELF header:
15101510

15111511
* ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
15121512
runtime ABI for code object V4. Specify using the Clang option
1513-
``-mcode-object-version=4``. This is the default code object
1514-
version if not specified.
1513+
``-mcode-object-version=4``.
15151514

15161515
* ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
15171516
runtime ABI for code object V5. Specify using the Clang option
1518-
``-mcode-object-version=5``.
1517+
``-mcode-object-version=5``. This is the default code object
1518+
version if not specified.
15191519

15201520
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
15211521
runtime ABI.
@@ -3949,6 +3949,10 @@ same *vendor-name*.
39493949
Code Object V4 Metadata
39503950
+++++++++++++++++++++++
39513951

3952+
. warning::
3953+
Code object V4 is not the default code object version emitted by this version
3954+
of LLVM.
3955+
39523956
Code object V4 metadata is the same as
39533957
:ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
39543958
defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3979,11 +3983,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
39793983
Code Object V5 Metadata
39803984
+++++++++++++++++++++++
39813985

3982-
.. warning::
3983-
Code object V5 is not the default code object version emitted by this version
3984-
of LLVM.
3985-
3986-
39873986
Code object V5 metadata is the same as
39883987
:ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
39893988
:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
3535
"amdhsa-code-object-version", llvm::cl::Hidden,
36-
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4),
36+
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
3737
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
3838
"or asm directive still take priority if present)"));
3939

mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
264264

265265
// This constant must always match the default code object ABI version
266266
// of the AMDGPU backend.
267-
addControlConstant("__oclc_ABI_version", 400, 32);
267+
addControlConstant("__oclc_ABI_version", 500, 32);
268268
}
269269

270270
// Determine libraries we need to link - order matters due to dependencies

mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class ROCDLDialectLLVMIRTranslationInterface
9999
if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
100100
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
101101
}
102+
llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "256");
102103
}
103104
// Override flat-work-group-size
104105
// TODO: update clients to rocdl.flat_work_group_size instead,

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
489489
llvm.return %source5 : i32
490490
}
491491

492-
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" }
492+
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="256" }
493493
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
494494
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
495495
// CHECK-DAG: ![[$RANGE]] = !{i32 0, i32 64}

0 commit comments

Comments
 (0)