Skip to content

[AMDGPU][NFC] Update left over tests for COV5 #76984

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 9, 2024
Merged

Conversation

saiislam
Copy link
Contributor

@saiislam saiislam commented Jan 4, 2024

Update AMDGPU CodeGen lit tests to check for COV5 ABI.

@llvmbot
Copy link
Member

llvmbot commented Jan 4, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Saiyedul Islam (saiislam)

Changes

Update AMDGPU CodeGen lit tests to check for COV5 ABI.


Patch is 32.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76984.diff

13 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+8-3)
  • (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll (+13-5)
  • (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll (+7-2)
  • (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll (+7-2)
  • (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll (+7-2)
  • (modified) llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll (+7-2)
  • (modified) llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll (+5-4)
  • (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+4-1)
  • (modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+19-12)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (+11-7)
  • (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll (+4-1)
  • (modified) llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll (+27-24)
  • (modified) llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll (+6-3)
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
index d8388463554621..a374689da5736f 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=OPT %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV5 %s
 
 ; Check that AMDGPUAttributor is not run with -O0.
 ; OPT: .amdhsa_user_sgpr_private_segment_buffer 1
@@ -18,7 +19,8 @@
 
 ; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1
 ; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1
-; NOOPT: .amdhsa_user_sgpr_queue_ptr 1
+; COV4: .amdhsa_user_sgpr_queue_ptr 1
+; COV5: .amdhsa_user_sgpr_queue_ptr 0
 ; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; NOOPT: .amdhsa_user_sgpr_dispatch_id 1
 ; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0
@@ -32,3 +34,6 @@
 define amdgpu_kernel void @foo() {
   ret void
 }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index 6ff2dbabc8ecd5..2fdc9edad9e2d0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -1,5 +1,7 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefixes=ALL,GFX908 %s
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,GFX90A %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 | FileCheck -check-prefixes=ALL,GFX908,GFX908_V4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck -check-prefixes=ALL,GFX90A,GFX90A_V4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 | FileCheck -check-prefixes=ALL,GFX908,GFX908_V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a | FileCheck -check-prefixes=ALL,GFX90A,GFX90A_V5 %s
 
 ; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
 ; through aliases. If GlobalOpt is never run, we do not see direct
@@ -9,10 +11,13 @@
 
 ; ALL-LABEL: {{^}}kernel:
 ; GFX908: .amdhsa_next_free_vgpr 32
-; GFX908-NEXT: .amdhsa_next_free_sgpr 36
+; GFX908_V4: .amdhsa_next_free_sgpr 36
+; GFX908_V5: .amdhsa_next_free_sgpr 33
 
-; GFX90A: .amdhsa_next_free_vgpr 65
-; GFX90A-NEXT: .amdhsa_next_free_sgpr 36
+; GFX90A_V4: .amdhsa_next_free_vgpr 65
+; GFX90A_V4-NEXT: .amdhsa_next_free_sgpr 36
+; GFX90A_V5: .amdhsa_next_free_vgpr 59
+; GFX90A_V5-NEXT: .amdhsa_next_free_sgpr 33
 ; GFX90A-NEXT: .amdhsa_accum_offset 32
 define amdgpu_kernel void @kernel() #0 {
 bb:
@@ -29,3 +34,6 @@ bb:
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn }
 attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index 797376535cde06..25b33857ae07fd 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
 
 ; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
 ; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
 
 ; CHECK-LABEL: {{^}}kernel0:
 ; CHECK: .amdhsa_next_free_vgpr 53
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
 define amdgpu_kernel void @kernel0() #0 {
 bb:
   call void @alias0() #2
@@ -24,3 +26,6 @@ bb:
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn }
 attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 79bb2fb64117c9..ec7fd6d8617714 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
 
 ; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
 ; through aliases. If GlobalOpt is never run, we do not see direct
@@ -10,7 +11,8 @@
 
 ; CHECK-LABEL: {{^}}kernel1:
 ; CHECK: .amdhsa_next_free_vgpr 41
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
 define amdgpu_kernel void @kernel1() #0 {
 bb:
   call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -27,3 +29,6 @@ bb:
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" }
 attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index 5745dd9fdb67c3..4d5e466a218126 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
 
 ; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
 ; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
 
 ; CHECK-LABEL: {{^}}kernel2:
 ; CHECK: .amdhsa_next_free_vgpr 53
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
 define amdgpu_kernel void @kernel2() #0 {
 bb:
   call void @alias2() #2
@@ -24,3 +26,6 @@ bb:
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" }
 attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
index b922297c493fab..428dac2dac2b2f 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=CHECK,COV5 %s
 
 ; CallGraphAnalysis, which CodeGenSCC order depends on, does not look
 ; through aliases. If GlobalOpt is never run, we do not see direct
@@ -8,7 +9,8 @@
 
 ; CHECK-LABEL: {{^}}kernel3:
 ; CHECK: .amdhsa_next_free_vgpr 253
-; CHECK-NEXT: .amdhsa_next_free_sgpr 36
+; COV4: .amdhsa_next_free_sgpr 36
+; COV5: .amdhsa_next_free_sgpr 33
 define amdgpu_kernel void @kernel3() #0 {
 bb:
   call void @alias3() #2
@@ -24,3 +26,6 @@ bb:
 attributes #0 = { noinline norecurse nounwind optnone }
 attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" }
 attributes #2 = { nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
index 084b9686f88a38..ce478d41380afd 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
@@ -28,7 +28,6 @@ define i32 @divergent_lshr_and_cmp(i32 %x) {
 entry:
   %0 = and i32 %x, 2
   %1 = icmp ne i32 %0, 0
-  ; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
   br i1 %1, label %out.true, label %out.else
 
 out.true:
@@ -43,9 +42,9 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
   ; GCN-LABEL: name: uniform_opt_lshr_and_cmp
   ; GCN: bb.0.entry:
   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT:   liveins: $sgpr4_sgpr5
+  ; GCN-NEXT:   liveins: $sgpr2_sgpr3
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
   ; GCN-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
   ; GCN-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4)
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
@@ -84,7 +83,6 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
 entry:
   %0 = and i32 %x, 2
   %1 = icmp ne i32 %0, 0
-  ; Don't optimize the truncate in the SDag away.
   br i1 %1, label %out.true, label %out.else
 
 out.true:
@@ -96,3 +94,6 @@ out.else:
   store i1 %1, ptr addrspace(1) %out
   ret void
 }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index e2e1effc9b406b..3be2d94351192b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
 ; R600: |PV.{{[XYZW]}}|
 ; R600: -PV
 
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: s_load_dwordx2 s[0:1], s[2:3], 0x9
 define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fabs = call float @fabsf(float %bc)
@@ -109,3 +109,6 @@ declare float @fabsf(float) readnone
 declare float @llvm.fabs.f32(float) readnone
 declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index ea5add023d15cf..3973cf1eec831e 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -36,6 +36,7 @@
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -64,6 +65,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -81,7 +83,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 }
 
 ; GCN-LABEL: {{^}}queue_ptr:
-; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
+; GCN: global_load_u8 v{{[0-9]+}},
 
 ; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
 ; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2
@@ -91,11 +93,12 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
 ; WORKAROUND: .amdhsa_user_sgpr_count 15
 ; NOWORKAROUND: .amdhsa_user_sgpr_count 2
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 0
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -117,16 +120,16 @@ define amdgpu_kernel void @queue_ptr() {
 ; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
 ; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15
 
-; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
-; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
-; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
+; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6
+; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7
+; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8
 
 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
+; GCN: global_load_u8 v{{[0-9]+}},
 ; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3]
-; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]
 
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
-; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4
+; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5
 
 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
 ; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
@@ -135,13 +138,14 @@ define amdgpu_kernel void @queue_ptr() {
 
 ; GCN: .amdhsa_kernel all_inputs
 ; WORKAROUND: .amdhsa_user_sgpr_count 13
-; NOWORKAROUND: .amdhsa_user_sgpr_count 8
+; NOWORKAROUND: .amdhsa_user_sgpr_count 6
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
 ; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
 ; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 ; GCN-NEXT: .amdhsa_wavefront_size32
+; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
 ; GCN-NEXT: .amdhsa_enable_private_segment 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
@@ -149,7 +153,7 @@ define amdgpu_kernel void @queue_ptr() {
 ; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
 ; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
 ; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
-; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
+; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6
 define amdgpu_kernel void @all_inputs() {
   %alloca = alloca i32, addrspace(5)
   store volatile i32 0, ptr addrspace(5) %alloca
@@ -188,3 +192,6 @@ declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
 declare i64 @llvm.amdgcn.dispatch.id() #0
 
 attributes #0 = { nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index 7479fc87e31883..2672c12ecf1fc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -1,15 +1,16 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
 ; GCN-LABEL: {{^}}is_local_vgpr:
 ; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: s_load_dwordx2 s[0:1], s[4:5], 0x0
 
 ; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
 ; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
 
-; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
+; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
+; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
 define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -25,15 +26,15 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; select and vcc branch.
 
 ; GCN-LABEL: {{^}}is_local_sgpr:
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: s_load_dword s0, s[4:5], 0x1
 
-; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
+; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x33{{$}}
 ; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
 
 ; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
 ; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
 
-; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
+; CI: s_cmp_eq_u32 s0, [[PTR_HI]]
 ; GCN: s_cbranch_vccnz
 define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
   %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
@@ -51,3 +52,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
 
 attributes #0 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index ec83d7f313d6af..a8bb36bc0d19e8 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
 ; ASM: buffer_store_dword
 ; ASM: buffer_store_dword
 ; ASM: s_swappc_b64
-; ASM: ScratchSize: 16400
+; ASM: ScratchSize: 16
 define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
 entry:
   %tmp = alloca [2 x i32], addrspace(5)
@@ -94,3 +94,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
 attributes #1 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index ecdc3845efc4b7..7d7917e0b20cfe 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -24,7 +24,7 @@ define amdgpu_kernel void @volatile_load_group_size_x(ptr addrspace(1) %out) #0
 }
 
 ; CHECK-LABEL: @load_group_s...
[truncated]

Update AMDGPU CodeGen lit tests to check for COV5 ABI.
@saiislam saiislam merged commit 4f7c402 into llvm:main Jan 9, 2024
@saiislam saiislam deleted the cov5_lit5 branch January 9, 2024 12:01
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
Update AMDGPU CodeGen lit tests to check for COV5 ABI.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants