Skip to content

[AMDGPU][NFC] Update left over tests for COV5 #76984

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions llvm/test/CodeGen/AMDGPU/attributor-noopt.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=OPT %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV5 %s

; Check that AMDGPUAttributor is not run with -O0.
; OPT: .amdhsa_user_sgpr_private_segment_buffer 1
Expand All @@ -18,7 +19,8 @@

; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1
; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1
; NOOPT: .amdhsa_user_sgpr_queue_ptr 1
; COV4: .amdhsa_user_sgpr_queue_ptr 1
; COV5: .amdhsa_user_sgpr_queue_ptr 0
; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; NOOPT: .amdhsa_user_sgpr_dispatch_id 1
; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0
Expand All @@ -32,3 +34,6 @@
define amdgpu_kernel void @foo() {
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
9 changes: 6 additions & 3 deletions llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

; ALL-LABEL: {{^}}kernel:
; GFX908: .amdhsa_next_free_vgpr 32
; GFX908-NEXT: .amdhsa_next_free_sgpr 36
; GFX908-NEXT: .amdhsa_next_free_sgpr 33

; GFX90A: .amdhsa_next_free_vgpr 65
; GFX90A-NEXT: .amdhsa_next_free_sgpr 36
; GFX90A: .amdhsa_next_free_vgpr 59
; GFX90A-NEXT: .amdhsa_next_free_sgpr 33
; GFX90A-NEXT: .amdhsa_accum_offset 32
define amdgpu_kernel void @kernel() #0 {
bb:
Expand All @@ -29,3 +29,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

; CHECK-LABEL: {{^}}kernel0:
; CHECK: .amdhsa_next_free_vgpr 53
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel0() #0 {
bb:
call void @alias0() #2
Expand All @@ -24,3 +24,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
attributes #2 = { nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

; CHECK-LABEL: {{^}}kernel1:
; CHECK: .amdhsa_next_free_vgpr 41
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel1() #0 {
bb:
call void asm sideeffect "; clobber v40 ", "~{v40}"()
Expand All @@ -27,3 +27,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" }
attributes #2 = { nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

; CHECK-LABEL: {{^}}kernel2:
; CHECK: .amdhsa_next_free_vgpr 53
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel2() #0 {
bb:
call void @alias2() #2
Expand All @@ -24,3 +24,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" }
attributes #2 = { nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

; CHECK-LABEL: {{^}}kernel3:
; CHECK: .amdhsa_next_free_vgpr 253
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
define amdgpu_kernel void @kernel3() #0 {
bb:
call void @alias3() #2
Expand All @@ -24,3 +24,6 @@ bb:
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" }
attributes #2 = { nounwind readnone willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ define i32 @divergent_lshr_and_cmp(i32 %x) {
entry:
%0 = and i32 %x, 2
%1 = icmp ne i32 %0, 0
; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
br i1 %1, label %out.true, label %out.else

out.true:
Expand All @@ -43,9 +42,9 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
; GCN-LABEL: name: uniform_opt_lshr_and_cmp
; GCN: bb.0.entry:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: liveins: $sgpr4_sgpr5
; GCN-NEXT: liveins: $sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4)
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
Expand Down Expand Up @@ -84,7 +83,6 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
entry:
%0 = and i32 %x, 2
%1 = icmp ne i32 %0, 0
; Don't optimize the truncate in the SDag away.
br i1 %1, label %out.true, label %out.else

out.true:
Expand All @@ -96,3 +94,6 @@ out.else:
store i1 %1, ptr addrspace(1) %out
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV

; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: s_load_dwordx2 s[0:1], s[2:3], 0x9
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabsf(float %bc)
Expand Down Expand Up @@ -109,3 +109,6 @@ declare float @fabsf(float) readnone
declare float @llvm.fabs.f32(float) readnone
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
31 changes: 19 additions & 12 deletions llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 0
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
Expand Down Expand Up @@ -64,6 +65,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
Expand All @@ -81,7 +83,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
}

; GCN-LABEL: {{^}}queue_ptr:
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
; GCN: global_load_u8 v{{[0-9]+}},

; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2
Expand All @@ -91,11 +93,12 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
; WORKAROUND: .amdhsa_user_sgpr_count 15
; NOWORKAROUND: .amdhsa_user_sgpr_count 2
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 0
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
Expand All @@ -117,16 +120,16 @@ define amdgpu_kernel void @queue_ptr() {
; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15

; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6
; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7
; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8

; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
; GCN: global_load_u8 v{{[0-9]+}},
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3]
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]

; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5

; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
Expand All @@ -135,21 +138,22 @@ define amdgpu_kernel void @queue_ptr() {

; GCN: .amdhsa_kernel all_inputs
; WORKAROUND: .amdhsa_user_sgpr_count 13
; NOWORKAROUND: .amdhsa_user_sgpr_count 8
; NOWORKAROUND: .amdhsa_user_sgpr_count 6
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
; GCN-NEXT: .amdhsa_wavefront_size32
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
; GCN-NEXT: .amdhsa_enable_private_segment 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6
define amdgpu_kernel void @all_inputs() {
%alloca = alloca i32, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
Expand Down Expand Up @@ -188,3 +192,6 @@ declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
declare i64 @llvm.amdgcn.dispatch.id() #0

attributes #0 = { nounwind readnone speculatable willreturn }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
18 changes: 11 additions & 7 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

; GCN-LABEL: {{^}}is_local_vgpr:
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
; CI-DAG: s_load_dwordx2 s[0:1], s[4:5], 0x0

; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]

; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
%id = call i32 @llvm.amdgcn.workitem.id.x()
Expand All @@ -25,15 +26,15 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
; select and vcc branch.

; GCN-LABEL: {{^}}is_local_sgpr:
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
; CI-DAG: s_load_dword s0, s[4:5], 0x1

; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x33{{$}}
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}

; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]

; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
; CI: s_cmp_eq_u32 s0, [[PTR_HI]]
; GCN: s_cbranch_vccnz
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
Expand All @@ -51,3 +52,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0

attributes #0 = { nounwind readnone speculatable }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
; ASM: buffer_store_dword
; ASM: buffer_store_dword
; ASM: s_swappc_b64
; ASM: ScratchSize: 16400
; ASM: ScratchSize: 16
define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
entry:
%tmp = alloca [2 x i32], addrspace(5)
Expand All @@ -94,3 +94,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1

attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
attributes #1 = { nounwind readnone }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
Loading