Skip to content

Commit 4f7c402

Browse files
authored
[AMDGPU][NFC] Update left over tests for COV5 (#76984)
Update AMDGPU CodeGen lit tests to check for COV5 ABI.
1 parent 0b9b00c commit 4f7c402

13 files changed

+106
-62
lines changed

llvm/test/CodeGen/AMDGPU/attributor-noopt.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=OPT %s
2-
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=NOOPT %s
1+
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefix=OPT %s
2+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV4 %s
3+
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=NOOPT,COV5 %s
34

45
; Check that AMDGPUAttributor is not run with -O0.
56
; OPT: .amdhsa_user_sgpr_private_segment_buffer 1
@@ -18,7 +19,8 @@
1819

1920
; NOOPT: .amdhsa_user_sgpr_private_segment_buffer 1
2021
; NOOPT: .amdhsa_user_sgpr_dispatch_ptr 1
21-
; NOOPT: .amdhsa_user_sgpr_queue_ptr 1
22+
; COV4: .amdhsa_user_sgpr_queue_ptr 1
23+
; COV5: .amdhsa_user_sgpr_queue_ptr 0
2224
; NOOPT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
2325
; NOOPT: .amdhsa_user_sgpr_dispatch_id 1
2426
; NOOPT: .amdhsa_user_sgpr_flat_scratch_init 0
@@ -32,3 +34,6 @@
3234
define amdgpu_kernel void @foo() {
3335
ret void
3436
}
37+
38+
!llvm.module.flags = !{!0}
39+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}

llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
; ALL-LABEL: {{^}}kernel:
1111
; GFX908: .amdhsa_next_free_vgpr 32
12-
; GFX908-NEXT: .amdhsa_next_free_sgpr 36
12+
; GFX908-NEXT: .amdhsa_next_free_sgpr 33
1313

14-
; GFX90A: .amdhsa_next_free_vgpr 65
15-
; GFX90A-NEXT: .amdhsa_next_free_sgpr 36
14+
; GFX90A: .amdhsa_next_free_vgpr 59
15+
; GFX90A-NEXT: .amdhsa_next_free_sgpr 33
1616
; GFX90A-NEXT: .amdhsa_accum_offset 32
1717
define amdgpu_kernel void @kernel() #0 {
1818
bb:
@@ -29,3 +29,6 @@ bb:
2929
attributes #0 = { noinline norecurse nounwind optnone }
3030
attributes #1 = { noinline norecurse nounwind readnone willreturn }
3131
attributes #2 = { nounwind readnone willreturn }
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
; CHECK-LABEL: {{^}}kernel0:
1010
; CHECK: .amdhsa_next_free_vgpr 53
11-
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
11+
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
1212
define amdgpu_kernel void @kernel0() #0 {
1313
bb:
1414
call void @alias0() #2
@@ -24,3 +24,6 @@ bb:
2424
attributes #0 = { noinline norecurse nounwind optnone }
2525
attributes #1 = { noinline norecurse nounwind readnone willreturn }
2626
attributes #2 = { nounwind readnone willreturn }
27+
28+
!llvm.module.flags = !{!0}
29+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
; CHECK-LABEL: {{^}}kernel1:
1212
; CHECK: .amdhsa_next_free_vgpr 41
13-
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
13+
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
1414
define amdgpu_kernel void @kernel1() #0 {
1515
bb:
1616
call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -27,3 +27,6 @@ bb:
2727
attributes #0 = { noinline norecurse nounwind optnone }
2828
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="8,10" }
2929
attributes #2 = { nounwind readnone willreturn }
30+
31+
!llvm.module.flags = !{!0}
32+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
; CHECK-LABEL: {{^}}kernel2:
1010
; CHECK: .amdhsa_next_free_vgpr 53
11-
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
11+
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
1212
define amdgpu_kernel void @kernel2() #0 {
1313
bb:
1414
call void @alias2() #2
@@ -24,3 +24,6 @@ bb:
2424
attributes #0 = { noinline norecurse nounwind optnone }
2525
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-waves-per-eu"="4,10" }
2626
attributes #2 = { nounwind readnone willreturn }
27+
28+
!llvm.module.flags = !{!0}
29+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
; CHECK-LABEL: {{^}}kernel3:
1010
; CHECK: .amdhsa_next_free_vgpr 253
11-
; CHECK-NEXT: .amdhsa_next_free_sgpr 36
11+
; CHECK-NEXT: .amdhsa_next_free_sgpr 33
1212
define amdgpu_kernel void @kernel3() #0 {
1313
bb:
1414
call void @alias3() #2
@@ -24,3 +24,6 @@ bb:
2424
attributes #0 = { noinline norecurse nounwind optnone }
2525
attributes #1 = { noinline norecurse nounwind readnone willreturn "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,1" }
2626
attributes #2 = { nounwind readnone willreturn }
27+
28+
!llvm.module.flags = !{!0}
29+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ define i32 @divergent_lshr_and_cmp(i32 %x) {
2828
entry:
2929
%0 = and i32 %x, 2
3030
%1 = icmp ne i32 %0, 0
31-
; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
3231
br i1 %1, label %out.true, label %out.else
3332

3433
out.true:
@@ -43,9 +42,9 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
4342
; GCN-LABEL: name: uniform_opt_lshr_and_cmp
4443
; GCN: bb.0.entry:
4544
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
46-
; GCN-NEXT: liveins: $sgpr4_sgpr5
45+
; GCN-NEXT: liveins: $sgpr2_sgpr3
4746
; GCN-NEXT: {{ $}}
48-
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
47+
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr2_sgpr3
4948
; GCN-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
5049
; GCN-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 11, 0 :: (dereferenceable invariant load (s32) from %ir.x.kernarg.offset, addrspace 4)
5150
; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
@@ -84,7 +83,6 @@ define amdgpu_kernel void @uniform_opt_lshr_and_cmp(ptr addrspace(1) %out, i32 %
8483
entry:
8584
%0 = and i32 %x, 2
8685
%1 = icmp ne i32 %0, 0
87-
; Don't optimize the truncate in the SDag away.
8886
br i1 %1, label %out.true, label %out.else
8987

9088
out.true:
@@ -96,3 +94,6 @@ out.else:
9694
store i1 %1, ptr addrspace(1) %out
9795
ret void
9896
}
97+
98+
!llvm.module.flags = !{!0}
99+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/fneg-fabs.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) {
4949
; R600: |PV.{{[XYZW]}}|
5050
; R600: -PV
5151

52-
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
52+
; SI: s_load_dwordx2 s[0:1], s[2:3], 0x9
5353
define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) {
5454
%bc = bitcast i32 %in to float
5555
%fabs = call float @fabsf(float %bc)
@@ -109,3 +109,6 @@ declare float @fabsf(float) readnone
109109
declare float @llvm.fabs.f32(float) readnone
110110
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
111111
declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
112+
113+
!llvm.module.flags = !{!0}
114+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
3737
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
3838
; GCN-NEXT: .amdhsa_wavefront_size32
39+
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
3940
; GCN-NEXT: .amdhsa_enable_private_segment 0
4041
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
4142
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -64,6 +65,7 @@ define amdgpu_kernel void @minimal_kernel_inputs() {
6465
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
6566
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
6667
; GCN-NEXT: .amdhsa_wavefront_size32
68+
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
6769
; GCN-NEXT: .amdhsa_enable_private_segment 1
6870
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
6971
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -81,7 +83,7 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
8183
}
8284

8385
; GCN-LABEL: {{^}}queue_ptr:
84-
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
86+
; GCN: global_load_u8 v{{[0-9]+}},
8587

8688
; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15
8789
; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s2
@@ -91,11 +93,12 @@ define amdgpu_kernel void @minimal_kernel_inputs_with_stack() {
9193
; WORKAROUND: .amdhsa_user_sgpr_count 15
9294
; NOWORKAROUND: .amdhsa_user_sgpr_count 2
9395
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
94-
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
95-
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
96+
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
97+
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
9698
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0
9799
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
98100
; GCN-NEXT: .amdhsa_wavefront_size32
101+
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
99102
; GCN-NEXT: .amdhsa_enable_private_segment 0
100103
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
101104
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
@@ -117,16 +120,16 @@ define amdgpu_kernel void @queue_ptr() {
117120
; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14
118121
; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15
119122

120-
; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8
121-
; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9
122-
; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10
123+
; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s6
124+
; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s7
125+
; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s8
123126

124127
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1]
128+
; GCN: global_load_u8 v{{[0-9]+}},
125129
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[2:3]
126-
; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5]
127130

128-
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6
129-
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7
131+
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s4
132+
; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s5
130133

131134
; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off
132135
; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off
@@ -135,21 +138,22 @@ define amdgpu_kernel void @queue_ptr() {
135138

136139
; GCN: .amdhsa_kernel all_inputs
137140
; WORKAROUND: .amdhsa_user_sgpr_count 13
138-
; NOWORKAROUND: .amdhsa_user_sgpr_count 8
141+
; NOWORKAROUND: .amdhsa_user_sgpr_count 6
139142
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
140-
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1
143+
; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0
141144
; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
142145
; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1
143146
; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0
144147
; GCN-NEXT: .amdhsa_wavefront_size32
148+
; GCN-NEXT: .amdhsa_uses_dynamic_stack 0
145149
; GCN-NEXT: .amdhsa_enable_private_segment 1
146150
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
147151
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
148152
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
149153
; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0
150154
; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0
151155
; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13
152-
; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8
156+
; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 6
153157
define amdgpu_kernel void @all_inputs() {
154158
%alloca = alloca i32, addrspace(5)
155159
store volatile i32 0, ptr addrspace(5) %alloca
@@ -188,3 +192,6 @@ declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
188192
declare i64 @llvm.amdgcn.dispatch.id() #0
189193

190194
attributes #0 = { nounwind readnone speculatable willreturn }
195+
196+
!llvm.module.flags = !{!0}
197+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
1+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIT %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIH %s
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
44

55
; GCN-LABEL: {{^}}is_local_vgpr:
66
; GCN-DAG: {{flat|global|buffer}}_load_dwordx2 v{{\[[0-9]+}}:[[PTR_HI:[0-9]+]]]
7-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
7+
; CI-DAG: s_load_dwordx2 s[0:1], s[4:5], 0x0
88

99
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
1010
; GFX9: v_cmp_eq_u32_e32 vcc, s[[HI]], v[[PTR_HI]]
1111

12-
; CI: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]]
12+
; CIT: v_cmp_eq_u32_e32 vcc, s4, v[[PTR_HI]]
13+
; CIH: v_cmp_eq_u32_e32 vcc, s2, v[[PTR_HI]]
1314
; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc
1415
define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
1516
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -25,15 +26,15 @@ define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
2526
; select and vcc branch.
2627

2728
; GCN-LABEL: {{^}}is_local_sgpr:
28-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
29+
; CI-DAG: s_load_dword s0, s[4:5], 0x1
2930

30-
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[6:7], 0x1{{$}}
31+
; CI-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x33{{$}}
3132
; GFX9-DAG: s_load_dword [[PTR_HI:s[0-9]+]], s[4:5], 0x4{{$}}
3233

3334
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
3435
; GFX9: s_cmp_eq_u32 [[PTR_HI]], s[[HI]]
3536

36-
; CI: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]]
37+
; CI: s_cmp_eq_u32 s0, [[PTR_HI]]
3738
; GCN: s_cbranch_vccnz
3839
define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
3940
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
@@ -51,3 +52,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
5152
declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
5253

5354
attributes #0 = { nounwind readnone speculatable }
55+
56+
!llvm.module.flags = !{!0}
57+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ declare i32 @foo(ptr addrspace(5)) #0
7777
; ASM: buffer_store_dword
7878
; ASM: buffer_store_dword
7979
; ASM: s_swappc_b64
80-
; ASM: ScratchSize: 16400
80+
; ASM: ScratchSize: 16
8181
define amdgpu_kernel void @call_private(ptr addrspace(1) %out, i32 %in) #0 {
8282
entry:
8383
%tmp = alloca [2 x i32], addrspace(5)
@@ -94,3 +94,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
9494

9595
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
9696
attributes #1 = { nounwind readnone }
97+
98+
!llvm.module.flags = !{!0}
99+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

0 commit comments

Comments
 (0)