-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Update llvm-objdump lit tests for COV5 #79039
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-driver @llvm/pr-subscribers-mlir-llvm Author: Saiyedul Islam (saiislam) ChangesDepends on #79038 which makes cov5 as the default code Full diff: https://github.com/llvm/llvm-project/pull/79039.diff 19 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f9e883e3e22de8..d4b82b301f12e6 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4777,12 +4777,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
- HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
+ HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
- MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
+ MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp
index 0a808aa6cc75ed..ae2c61439f4ca5 100644
--- a/clang/test/CodeGen/amdgpu-address-spaces.cpp
+++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp
@@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ff5deaf9ab850d..3cb6632fc0b63d 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
// Create module flag for code object version.
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN: -o - %s | FileCheck %s -check-prefix=V4
+// RUN: -o - %s | FileCheck %s -check-prefix=V5
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 282e0a49b9aa10..0c846e0936b58b 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -1,10 +1,10 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
+// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
+// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 80aa1ee0700628..9c9ea521271b99 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -46,11 +46,11 @@ __global__ void kernel() {
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
-// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
-// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index e574b1f64c499b..2cf1286e2b54e8 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #8 = { nounwind }
// GFX900: attributes #9 = { convergent nounwind }
//.
-// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
// NOCPU: !2 = !{i32 2, i32 0}
// NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
@@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: !15 = !{i32 1}
// NOCPU: !16 = !{!"int*"}
//.
-// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
// GFX900: !2 = !{i32 2, i32 0}
// GFX900: !3 = !{!4, !4, i64 0}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 0bc9a54682d3e3..8d9e4e018b12e5 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -601,13 +601,13 @@ void test_get_local_id(int d, global int *out)
}
// CHECK-LABEL: @test_get_workgroup_size(
-// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
+// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
-// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
+// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
void test_get_workgroup_size(int d, global int *out)
{
switch (d) {
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 548d677afdecb8..6b2417143ca06c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1510,12 +1510,12 @@ The AMDGPU backend uses the following ELF header:
* ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
runtime ABI for code object V4. Specify using the Clang option
- ``-mcode-object-version=4``. This is the default code object
- version if not specified.
+ ``-mcode-object-version=4``.
* ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
runtime ABI for code object V5. Specify using the Clang option
- ``-mcode-object-version=5``.
+ ``-mcode-object-version=5``. This is the default code object
+ version if not specified.
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
@@ -3949,6 +3949,10 @@ same *vendor-name*.
Code Object V4 Metadata
+++++++++++++++++++++++
+. warning::
+ Code object V4 is not the default code object version emitted by this version
+ of LLVM.
+
Code object V4 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
@@ -3979,11 +3983,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
Code Object V5 Metadata
+++++++++++++++++++++++
-.. warning::
- Code object V5 is not the default code object version emitted by this version
- of LLVM.
-
-
Code object V5 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index f1c05446bf6069..0bf9452d822e97 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -33,7 +33,7 @@
static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
- llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4),
+ llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
"or asm directive still take priority if present)"));
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
index 58b01031afe383..781729d5c4cc1a 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
@@ -48,6 +48,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -101,6 +102,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -154,6 +156,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -207,6 +210,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
index 2133002908d9fc..019c20754f389d 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -49,6 +49,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -103,6 +104,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -157,6 +159,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -211,6 +214,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
index e1d312d6035cb7..86af4810059ecd 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
@@ -46,6 +46,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -97,6 +98,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
index d26189451829f7..4978f6974fd33c 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
@@ -45,6 +45,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
@@ -95,6 +96,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
@@ -145,6 +147,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 2
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
; CHECK-NEXT: .end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
index 1f6f134cd67eff..a40cf1d3776932 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
@@ -44,6 +44,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
@@ -95,6 +96,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
@@ -146,6 +148,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
index 4d385a1c885780..b6b9c91b142462 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
@@ -43,6 +43,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 23
@@ -90,6 +91,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 14
@@ -137,6 +139,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
index 39cef4da4278df..39739c957350cb 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
@@ -62,6 +62,7 @@
; OBJDUMP-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; OBJDUMP-NEXT: .amdhsa_wavefront_size32 0
+; OBJDUMP-NEXT: .amdhsa_uses_dynamic_stack 0
; OBJDUMP-NEXT: .end_amdhsa_kernel
.amdhsa_kernel my_kernel
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index 5cce7befce5283..eee7a680f5b3bf 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -264,7 +264,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
// This constant must always match the default code object ABI version
// of the AMDGPU backend.
- addControlConstant("__oclc_ABI_version", 400, 32);
+ addControlConstant("__oclc_ABI_version", 500, 32);
}
// Determine libraries we need to link - order matters due to dependencies
diff --git a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
index cbce23fd580e75..a230ead7c18831 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp
@@ -99,6 +99,7 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
+ llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "256");
}
// Override flat-work-group-size
// TODO: update clients to rocdl.flat_work_group_size instead,
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 3c9c70711ae230..f831d7bba864c8 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -489,7 +489,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
llvm.return %source5 : i32
}
-// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" }
+// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="256" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
// CHECK-DAG: ![[$RANGE]] = !{i32 0, i32 64}
|
@@ -99,6 +99,7 @@ class ROCDLDialectLLVMIRTranslationInterface | |||
if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) { | |||
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256"); | |||
} | |||
llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "256"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wasn't this in the other review as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also update LIT tests, docs, and release notes for Clang and LLVM. For more details, see https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata
Depends on llvm#79038 which makes cov5 as the default code object version.
Depends on #79038 which makes cov5 as the default code
object version.