-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][NFC] Update left over tests for COV5 #76984
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu Author: Saiyedul Islam (saiislam) ChangesUpdate AMDGPU CodeGen lit tests to check for COV5 ABI. Patch is 32.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76984.diff 13 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
index d8388463554621..a374689da5736f 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=OPT %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV5 %s
; Check that AMDGPUAttributor is not run with -O0.
; OPT: .amdhsa_user_sgpr_private_segment_buffer 1
@@ -18,7 +19,8 @@
; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1
; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1
-; NOOPT: .amdhsa_user_sgpr_queue_ptr 1
+; COV4: .amdhsa_user_sgpr_queue_ptr 1
+; COV5: .amdhsa_user_sgpr_queue_ptr 0
; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; NOOPT: .amdhsa_user_sgpr_dispatch_id 1
; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0
@@ -32,3 +34,6 @@
define amdgpu_kernel void @foo() {
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index 6ff2dbabc8ecd5..2fdc9edad9e2d0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -1,5 +1,7 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=ALL,GFX908 %s
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,GFX90A %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 | FileCheck -check-prefixes=ALL,GFX908,GFX908_V4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck -check-prefixes=ALL,GFX90A,GFX90A_V4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 | FileCheck -check-prefixes=ALL,GFX908,GFX908_V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck -check-prefixes=ALL,GFX90A,GFX90A_V5 %s
; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
; through aliases. If GlobalOpt is never run, we do not see direct
@@ -9,10 +11,13 @@
; ALL-LABEL: {{^}}kernel:
; GFX908: .amdhsa_next_free_vgpr 32
-; GFX908-NEXT: .amdhsa_next_free_sgpr 36
+; GFX908_V4: .amdhsa_next_free_sgpr 36
+; GFX908_V5: .amdhsa_next_free_sgpr 33
-; GFX90A: .amdhsa_next_free_vgpr 65
-; GFX90A-NEXT: .amdhsa_next_free_sgpr 36
+; GFX90A_V4: .amdhsa_next_free_vgpr 65
+; GFX90A_V4-NEXT: .amdhsa_next_free_sgpr 36
+; GFX90A_V5: .amdhsa_next_free_vgpr 59
+; GFX90A_V5-NEXT: .amdhsa_next_free_sgpr 33
; GFX90A-NEXT: .amdhsa_accum_offset 32
define amdgpu_kernel void @kernel() #0 {
bb:
@@ -29,3 +34,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index 797376535cde06..25b33857ae07fd 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
; CHECK-LABEL: {{^}}kernel0:
; CHECK: .amdhsa_next_free_vgpr 53
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel0() #0 {
bb:
call void @alias0() #2
@@ -24,3 +26,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 79bb2fb64117c9..ec7fd6d8617714 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
; through aliases. If GlobalOpt is never run, we do not see direct
@@ -10,7 +11,8 @@
; CHECK-LABEL: {{^}}kernel1:
; CHECK: .amdhsa_next_free_vgpr 41
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel1() #0 {
bb:
call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -27,3 +29,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" }
attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index 5745dd9fdb67c3..4d5e466a218126 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
; CHECK-LABEL: {{^}}kernel2:
; CHECK: .amdhsa_next_free_vgpr 53
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel2() #0 {
bb:
call void @alias2() #2
@@ -24,3 +26,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" }
attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index b922297c493fab..428dac2dac2b2f 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
; CHECK-LABEL: {{^}}kernel3:
; CHECK: .amdhsa_next_free_vgpr 253
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel3() #0 {
bb:
call void @alias3() #2
@@ -24,3 +26,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" }
attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
index 084b9686f88a38..ce478d41380afd 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
@@ -28,7 +28,6 @@ define i32 @divergent_lshr_and_cmp(i32 %x) {
entry:
%0 = and i32 %x, 2
%1 = icmp ne i32 %0, 0
- ; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
br i1 %1, label %out.true, label %out.else
out.true:
@@ -43,9 +42,9 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
; GCN-LABEL: name: uniform_opt_lshr_and_cmp
; GCN: bb.0.entry:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; GCN-NEXT: liveins: $sgpr4_sgpr5
+ ; GCN-NEXT: liveins: $sgpr2_sgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4)
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
@@ -84,7 +83,6 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
entry:
%0 = and i32 %x, 2
%1 = icmp ne i32 %0, 0
- ; Don't optimize the truncate in the SDag away.
br i1 %1, label %out.true, label %out.else
out.true:
@@ -96,3 +94,6 @@ out.else:
store i1 %1, ptr addrspace(1) %out
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index e2e1effc9b406b..3be2d94351192b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: s_load_dwordx2 s[0:1], s[2:3], 0x9
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabsf(float %bc)
@@ -109,3 +109,6 @@ declare float @fabsf(float) readnone
declare float @llvm.fabs.f32(float) readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index ea5add023d15cf..3973cf1eec831e 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -36,6 +36,7 @@
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 0
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -64,6 +65,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -81,7 +83,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
}
; GCN-LABEL: {{^}}queue_ptr:
-; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
+; GCN: global_load_u8 v{{[0-9]+}},
; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2
@@ -91,11 +93,12 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
; WORKAROUND: .amdhsa_user_sgpr_count 15
; NOWORKAROUND: .amdhsa_user_sgpr_count 2
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 0
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -117,16 +120,16 @@ define amdgpu_kernel void @queue_ptr() {
; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15
-; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
-; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
-; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
+; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6
+; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7
+; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
+; GCN: global_load_u8 v{{[0-9]+}},
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3]
-; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5
; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
@@ -135,13 +138,14 @@ define amdgpu_kernel void @queue_ptr() {
; GCN: .amdhsa_kernel all_inputs
; WORKAROUND: .amdhsa_user_sgpr_count 13
-; NOWORKAROUND: .amdhsa_user_sgpr_count 8
+; NOWORKAROUND: .amdhsa_user_sgpr_count 6
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
@@ -149,7 +153,7 @@ define amdgpu_kernel void @queue_ptr() {
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
-; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
+; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6
define amdgpu_kernel void @all_inputs() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
@@ -188,3 +192,6 @@ declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0
attributes #0 = { nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index 7479fc87e31883..2672c12ecf1fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -1,15 +1,16 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; GCN-LABEL: {{^}}is_local_vgpr:
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
-; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
+; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
+; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -25,15 +26,15 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; select and vcc branch.
; GCN-LABEL: {{^}}is_local_sgpr:
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: s_load_dword s0, s[4:5], 0x1
-; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
+; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x33{{$}}
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
-; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
+; CI: s_cmp_eq_u32 s0, [[PTR_HI]]
; GCN: s_cbranch_vccnz
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
@@ -51,3 +52,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
attributes #0 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index ec83d7f313d6af..a8bb36bc0d19e8 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
; ASM: buffer_store_dword
; ASM: buffer_store_dword
; ASM: s_swappc_b64
-; ASM: ScratchSize: 16400
+; ASM: ScratchSize: 16
define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
entry:
%tmp = alloca [2 x i32], addrspace(5)
@@ -94,3 +94,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
attributes #1 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index ecdc3845efc4b7..7d7917e0b20cfe 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -24,7 +24,7 @@ define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) #0
}
; CHECK-LABEL: @load_group_s...
[truncated]
|
arsenm
reviewed
Jan 5, 2024
Update AMDGPU CodeGen lit tests to check for COV5 ABI.
arsenm
approved these changes
Jan 9, 2024
justinfargnoli
pushed a commit
to justinfargnoli/llvm-project
that referenced
this pull request
Jan 28, 2024
Update AMDGPU CodeGen lit tests to check for COV5 ABI.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Update AMDGPU CodeGen lit tests to check for COV5 ABI.