Skip to content

[AMDGPU] Update llvm-objdump lit tests for COV5 #79039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,9 @@ AMDGPU Support
arguments in C ABI. Callee is responsible for allocating stack memory and
copying the value of the struct if modified. Note that AMDGPU backend still
supports byval for struct arguments.
- The default value for ``-mcode-object-version`` is now 5.
See `AMDHSA Code Object V5 Metadata <https://llvm.org/docs/AMDGPUUsage.html#code-object-v5-metadata>`_
for more details.

X86 Support
^^^^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4777,12 +4777,12 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>, Group<m_Group>;

def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;

defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/amdgpu-address-spaces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Create module flag for code object version.

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V4
// RUN: -o - %s | FileCheck %s -check-prefix=V5

// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=PRECOV5 %s


// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s

// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenHIP/default-attributes.hip
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ __global__ void kernel() {
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
//.
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPTNONE: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
//.
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// OPT: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
//.
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #8 = { nounwind }
// GFX900: attributes #9 = { convergent nounwind }
//.
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// NOCPU: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// NOCPU: !1 = !{i32 1, !"wchar_size", i32 4}
// NOCPU: !2 = !{i32 2, i32 0}
// NOCPU: !3 = !{i32 1, i32 0, i32 1, i32 0}
Expand All @@ -721,7 +721,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: !15 = !{i32 1}
// NOCPU: !16 = !{!"int*"}
//.
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
// GFX900: !0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
// GFX900: !1 = !{i32 1, !"wchar_size", i32 4}
// GFX900: !2 = !{i32 2, i32 0}
// GFX900: !3 = !{!4, !4, i64 0}
Expand Down
10 changes: 5 additions & 5 deletions clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -601,13 +601,13 @@ void test_get_local_id(int d, global int *out)
}

// CHECK-LABEL: @test_get_workgroup_size(
// CHECK: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 4
// CHECK: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 6
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 8
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
// CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
void test_get_workgroup_size(int d, global int *out)
{
switch (d) {
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
! CHECK-NEXT: -L <dir> Add directory to library search path
! CHECK-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
! CHECK-NEXT: -mcode-object-version=<value>
! CHECK-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! CHECK-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only)
! CHECK-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
! CHECK-NEXT: -mllvm=<arg> Alias for -mllvm
! CHECK-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
! HELP-NEXT: -L <dir> Add directory to library search path
! HELP-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
! HELP-NEXT: -mcode-object-version=<value>
! HELP-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! HELP-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only)
! HELP-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
! HELP-NEXT: -mllvm=<arg> Alias for -mllvm
! HELP-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
Expand Down Expand Up @@ -240,7 +240,7 @@
! HELP-FC1-NEXT: -I <dir> Add directory to the end of the list of include search paths
! HELP-FC1-NEXT: -load <dsopath> Load the named plugin (dynamic shared object)
! HELP-FC1-NEXT: -mcode-object-version=<value>
! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 5. (AMDGPU only)
! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities.
! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs.
! HELP-FC1-NEXT: -mframe-pointer=<value> Specify which frame pointers to retain.
Expand Down
15 changes: 7 additions & 8 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1510,12 +1510,12 @@ The AMDGPU backend uses the following ELF header:

* ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
runtime ABI for code object V4. Specify using the Clang option
``-mcode-object-version=4``. This is the default code object
version if not specified.
``-mcode-object-version=4``.

* ``ELFABIVERSION_AMDGPU_HSA_V5`` is used to specify the version of AMD HSA
runtime ABI for code object V5. Specify using the Clang option
``-mcode-object-version=5``.
``-mcode-object-version=5``. This is the default code object
version if not specified.

* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
Expand Down Expand Up @@ -3949,6 +3949,10 @@ same *vendor-name*.
Code Object V4 Metadata
+++++++++++++++++++++++

. warning::
Code object V4 is not the default code object version emitted by this version
of LLVM.

Code object V4 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
Expand Down Expand Up @@ -3979,11 +3983,6 @@ defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v4`.
Code Object V5 Metadata
+++++++++++++++++++++++

.. warning::
Code object V5 is not the default code object version emitted by this version
of LLVM.


Code object V5 metadata is the same as
:ref:`amdgpu-amdhsa-code-object-metadata-v4` with the changes defined in table
:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v5`, table
Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ Changes to the AMDGPU Backend

* Implemented :ref:`llvm.get.rounding <int_get_rounding>`

* The default :ref:`AMDHSA code object version <amdgpu-amdhsa-code-object-metadata-v5>` is now 5.

Changes to the ARM Backend
--------------------------

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV4),
llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
"or asm directive still take priority if present)"));

Expand Down
4 changes: 4 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -101,6 +102,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -154,6 +156,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -207,6 +210,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -103,6 +104,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -157,6 +159,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -211,6 +214,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 1
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -97,6 +98,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_wavefront_size32 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx90a.s
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
Expand Down Expand Up @@ -95,6 +96,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down Expand Up @@ -145,6 +147,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 2
; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
; CHECK-NEXT: .end_amdhsa_kernel
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
Expand Down Expand Up @@ -95,6 +96,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
Expand Down Expand Up @@ -146,6 +148,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 0
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 23
Expand Down Expand Up @@ -90,6 +91,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 14
Expand Down Expand Up @@ -137,6 +139,7 @@
; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; CHECK-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; CHECK-NEXT: .amdhsa_uses_dynamic_stack 0
; CHECK-NEXT: .end_amdhsa_kernel
.amdhsa_kernel kernel
.amdhsa_next_free_vgpr 32
Expand Down
1 change: 1 addition & 0 deletions llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
; OBJDUMP-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
; OBJDUMP-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; OBJDUMP-NEXT: .amdhsa_wavefront_size32 0
; OBJDUMP-NEXT: .amdhsa_uses_dynamic_stack 0
; OBJDUMP-NEXT: .end_amdhsa_kernel

.amdhsa_kernel my_kernel
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) {

// This constant must always match the default code object ABI version
// of the AMDGPU backend.
addControlConstant("__oclc_ABI_version", 400, 32);
addControlConstant("__oclc_ABI_version", 500, 32);
}

// Determine libraries we need to link - order matters due to dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class ROCDLDialectLLVMIRTranslationInterface
if (!llvmFunc->hasFnAttribute("amdgpu-flat-work-group-size")) {
llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
llvmFunc->addFnAttr("amdgpu-implicitarg-num-bytes", "256");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't this in the other review as well?

Copy link
Contributor Author

@saiislam saiislam Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR depends on #79038 (the other review) and they both need to land together. When I created stacked PRs, it seems that Github brought both the commits together in this PR.

You may use this to see changes of this review only.

}
// Override flat-work-group-size
// TODO: update clients to rocdl.flat_work_group_size instead,
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Target/LLVMIR/rocdl.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
llvm.return %source5 : i32
}

// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" }
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-implicitarg-num-bytes"="256" }
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"
// CHECK-DAG: ![[$RANGE]] = !{i32 0, i32 64}
Expand Down