Skip to content

Commit a0afcbf

Browse files
authored
[AMDGPU] Enable AAAddressSpace in AMDGPUAttributor (#101593)
1 parent 2e9f15e commit a0afcbf

File tree

6 files changed

+96
-113
lines changed

6 files changed

+96
-113
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,7 +1038,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
10381038
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
10391039
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10401040
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041-
&AAUnderlyingObjects::ID});
1041+
&AAUnderlyingObjects::ID, &AAAddressSpace::ID});
10421042

10431043
AttributorConfig AC(CGUpdater);
10441044
AC.Allowed = &Allowed;
@@ -1064,6 +1064,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
10641064
} else if (CC == CallingConv::AMDGPU_KERNEL) {
10651065
addPreloadKernArgHint(F, TM);
10661066
}
1067+
1068+
for (auto &I : instructions(F)) {
1069+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
1070+
A.getOrCreateAAFor<AAAddressSpace>(
1071+
IRPosition::value(*LI->getPointerOperand()));
1072+
}
1073+
if (auto *SI = dyn_cast<StoreInst>(&I)) {
1074+
A.getOrCreateAAFor<AAAddressSpace>(
1075+
IRPosition::value(*SI->getPointerOperand()));
1076+
}
1077+
}
10671078
}
10681079

10691080
ChangeStatus Change = A.run();

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ target triple = "amdgcn-amd-amdhsa"
77
; Make sure flat_scratch_init is set
88

99
; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
10-
; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7
11-
; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0
10+
; RW-FLAT: s_add_u32 s0, s0, s7
11+
; RW-FLAT: s_addc_u32 s1, s1, 0
1212
; RO-FLAT-NOT: flat_scratch
13-
; GCN: flat_store_dword
13+
; RW-FLAT: buffer_store_dword
14+
; RO-FLAT: scratch_store_dword
1415
; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
1516
; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
1617
; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init

llvm/test/CodeGen/AMDGPU/addrspacecast.ll

Lines changed: 47 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,11 @@ target triple = "amdgcn-amd-amdhsa"
55

66
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
77

8-
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
9-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
10-
; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
11-
; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
12-
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
13-
14-
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
15-
8+
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x0{{$}}
9+
; GFX9-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x0{{$}}
10+
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], [[APERTURE]]
1611
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
17-
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
18-
19-
; GFX9: s_cmp_lg_u32 [[PTR]], -1
20-
; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
21-
; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
22-
23-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
12+
; HSA-DAG: ds_write_b32 [[PTR]], [[K]]
2413

2514
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
2615
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
@@ -39,22 +28,8 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
3928

4029
; Test handling inside a non-kernel
4130
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
42-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}}
43-
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
44-
; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
45-
; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
46-
; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
47-
48-
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
49-
5031
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
51-
52-
; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]]
53-
; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
54-
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
55-
; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
56-
57-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
32+
; HSA-DAG: ds_write_b32 v0, [[K]]
5833
define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
5934
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
6035
store volatile i32 7, ptr %stof
@@ -63,23 +38,16 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
6338

6439
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
6540

66-
; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
67-
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
68-
69-
; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
70-
; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
71-
; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
72-
; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
73-
74-
; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
75-
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
76-
77-
; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
78-
; GFX9: s_cmp_lg_u32 [[PTR]], -1
79-
; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
80-
; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
81-
82-
; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
41+
; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x0{{$}}
42+
; GFX9-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x0{{$}}
43+
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], [[APERTURE]]
44+
; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
45+
; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
46+
; SI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s9
47+
; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
48+
; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
49+
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
50+
; HSA: buffer_store_dword [[K]], [[PTR]], s[[[BASELO]]:[[RSRCHI]]], 0 offen
8351

8452
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
8553
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
@@ -97,10 +65,12 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
9765
; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
9866

9967
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
100-
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
101-
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
68+
; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
69+
; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
10270
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
103-
; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
71+
; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
72+
; GFX9-DAG: v_mov_b32_e32 [[ADDR:v[0-9]+]], 0
73+
; GFX9: global_store_dword [[ADDR]], [[K]], s[[[PTRLO]]:[[PTRHI]]]
10474

10575
; HSA: .amdhsa_user_sgpr_queue_ptr 0
10676
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
@@ -112,9 +82,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
11282
; no-op
11383
; HSA-LABEL: {{^}}use_constant_to_flat_addrspacecast:
11484
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
115-
; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
116-
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
117-
; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
85+
; HSA-DAG: s_load_dword s0, s[[[PTRLO]]:[[PTRHI]]], 0x0
11886
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
11987
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
12088
%ld = load volatile i32, ptr %stof
@@ -215,14 +183,9 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
215183
}
216184

217185
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
218-
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
219-
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
220-
221-
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
222-
223186
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
224187
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
225-
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
188+
; HSA: ds_write_b32 v[[LO]], v[[K]]
226189
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
227190
%cast = addrspacecast ptr addrspace(3) null to ptr
228191
store volatile i32 7, ptr %cast
@@ -240,10 +203,9 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
240203
}
241204

242205
; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
243-
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
244206
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
245-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
246-
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
207+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1
208+
; HSA: ds_write_b32 v[[LO]], v[[K]]
247209
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
248210
%cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
249211
store volatile i32 7, ptr %cast
@@ -262,14 +224,13 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
262224

263225
; FIXME: Shouldn't need to enable queue ptr
264226
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
265-
; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
266-
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
267-
268-
; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
269-
270-
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
227+
; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
228+
; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
229+
; CI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
230+
; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s5
231+
; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
271232
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
272-
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
233+
; HSA: buffer_store_dword v[[K]], off, s[[[BASELO]]:[[RSRCHI]]], 0
273234
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
274235
%cast = addrspacecast ptr addrspace(5) null to ptr
275236
store volatile i32 7, ptr %cast
@@ -286,13 +247,16 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
286247
ret void
287248
}
288249

289-
290250
; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
291251

292-
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
252+
; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
253+
; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
254+
; CI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
255+
; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s5
256+
; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
257+
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
293258
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
294-
; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
295-
; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
259+
; HSA: buffer_store_dword v[[K]], [[PTR]], s[[[BASELO]]:[[RSRCHI]]], 0 offen
296260

297261
; CI: .amdhsa_user_sgpr_queue_ptr 1
298262
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
@@ -342,16 +306,18 @@ end:
342306

343307
; Check for prologue initializing special SGPRs pointing to scratch.
344308
; HSA-LABEL: {{^}}store_flat_scratch:
345-
; CI-DAG: s_mov_b32 flat_scratch_lo, s9
346309
; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
347310
; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
348-
349-
; GFX9: s_add_u32 flat_scratch_lo, s6, s9
350-
; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
351-
352-
; HSA: {{flat|global}}_store_dword
353-
; HSA: s_barrier
354-
; HSA: {{flat|global}}_load_dword
311+
; HSA: buffer_store_dword
312+
; HSA: s_barrier
313+
; HSA: buffer_load_dword [[K:v[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen glc
314+
; HSA-DAG: s_load_dwordx2
315+
; CI-DAG: s_mov_b32 flat_scratch_lo, s9
316+
; CI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s4
317+
; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s5
318+
; GFX9-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0
319+
; CI: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
320+
; GFX9: global_store_dword [[PTR]], [[K]]
355321
define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
356322
%alloca = alloca i32, i32 9, align 4, addrspace(5)
357323
%x = call i32 @llvm.amdgcn.workitem.id.x() #2

llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
425425
;
426426
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
427427
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
428-
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
429-
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
428+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
430429
; ATTRIBUTOR_HSA-NEXT: ret void
431430
;
432431
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -443,8 +442,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
443442
;
444443
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
445444
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] {
446-
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
447-
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
445+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
448446
; ATTRIBUTOR_HSA-NEXT: ret void
449447
;
450448
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -478,23 +476,33 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
478476

479477
; No-op addrspacecast should not use queue ptr
480478
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
481-
; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
482-
; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
483-
; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
484-
; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
485-
; HSA-NEXT: ret void
479+
; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
480+
; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
481+
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
482+
; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
483+
; AKF_HSA-NEXT: ret void
484+
;
485+
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
486+
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
487+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
488+
; ATTRIBUTOR_HSA-NEXT: ret void
486489
;
487490
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
488491
store volatile i32 0, ptr %stof
489492
ret void
490493
}
491494

492495
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
493-
; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
494-
; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
495-
; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
496-
; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
497-
; HSA-NEXT: ret void
496+
; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
497+
; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
498+
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
499+
; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
500+
; AKF_HSA-NEXT: ret void
501+
;
502+
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
503+
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
504+
; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[PTR]], align 4
505+
; ATTRIBUTOR_HSA-NEXT: ret void
498506
;
499507
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
500508
%ld = load volatile i32, ptr %stof

llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,9 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
3838
}
3939

4040
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
41-
; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x0
42-
; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
43-
; CIVI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
41+
; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0
42+
; GCN-DAG: ds_write_b32 v[[LO]], v[[LO]] offset:16
4443

45-
; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
46-
; GFX9-DAG: v_mov_b32_e32 v[[VGPR_HI:[0-9]+]], s[[HI]]
47-
; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]]
48-
49-
; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
5044
define hidden void @use_queue_ptr_addrspacecast() #1 {
5145
%asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr
5246
store volatile i32 0, ptr %asc

llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ define amdgpu_kernel void @test_simple_indirect_call() {
3434
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
3535
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
3636
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
37-
; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
38-
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
39-
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
37+
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
38+
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
4039
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
4140
; ATTRIBUTOR_GCN-NEXT: ret void
4241
;
@@ -75,12 +74,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
7574
ret void
7675
}
7776

77+
78+
!llvm.module.flags = !{!0}
79+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
7880
;.
7981
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
8082
;.
8183
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
8284
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
8385
;.
84-
85-
!llvm.module.flags = !{!0}
86-
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
86+
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
87+
;.
88+
; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
89+
;.

0 commit comments

Comments
 (0)