Skip to content

Commit 9b743f5

Browse files
committed
[WIP][AMDGPU] Enable AAAddressSpace in AMDGPUAttributor
1 parent 255a049 commit 9b743f5

File tree

3 files changed

+43
-21
lines changed

3 files changed

+43
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,7 +1038,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
10381038
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
10391039
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10401040
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041-
&AAUnderlyingObjects::ID});
1041+
&AAUnderlyingObjects::ID, &AAAddressSpace::ID});
10421042

10431043
AttributorConfig AC(CGUpdater);
10441044
AC.Allowed = &Allowed;
@@ -1064,6 +1064,17 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
10641064
} else if (CC == CallingConv::AMDGPU_KERNEL) {
10651065
addPreloadKernArgHint(F, TM);
10661066
}
1067+
1068+
for (auto &I : instructions(F)) {
1069+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
1070+
A.getOrCreateAAFor<AAAddressSpace>(
1071+
IRPosition::value(*LI->getPointerOperand()));
1072+
}
1073+
if (auto *SI = dyn_cast<StoreInst>(&I)) {
1074+
A.getOrCreateAAFor<AAAddressSpace>(
1075+
IRPosition::value(*SI->getPointerOperand()));
1076+
}
1077+
}
10671078
}
10681079

10691080
ChangeStatus Change = A.run();

llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
425425
;
426426
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
427427
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
428-
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
429-
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
428+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
430429
; ATTRIBUTOR_HSA-NEXT: ret void
431430
;
432431
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -443,8 +442,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
443442
;
444443
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
445444
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] {
446-
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
447-
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
445+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
448446
; ATTRIBUTOR_HSA-NEXT: ret void
449447
;
450448
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -478,23 +476,33 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
478476

479477
; No-op addrspacecast should not use queue ptr
480478
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
481-
; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
482-
; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
483-
; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
484-
; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
485-
; HSA-NEXT: ret void
479+
; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
480+
; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
481+
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
482+
; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
483+
; AKF_HSA-NEXT: ret void
484+
;
485+
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
486+
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
487+
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
488+
; ATTRIBUTOR_HSA-NEXT: ret void
486489
;
487490
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
488491
store volatile i32 0, ptr %stof
489492
ret void
490493
}
491494

492495
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
493-
; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
494-
; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
495-
; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
496-
; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
497-
; HSA-NEXT: ret void
496+
; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
497+
; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
498+
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
499+
; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
500+
; AKF_HSA-NEXT: ret void
501+
;
502+
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
503+
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
504+
; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[PTR]], align 4
505+
; ATTRIBUTOR_HSA-NEXT: ret void
498506
;
499507
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
500508
%ld = load volatile i32, ptr %stof

llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ define amdgpu_kernel void @test_simple_indirect_call() {
3434
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
3535
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
3636
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
37-
; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
38-
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
39-
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
37+
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
38+
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
4039
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
4140
; ATTRIBUTOR_GCN-NEXT: ret void
4241
;
@@ -75,12 +74,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
7574
ret void
7675
}
7776

77+
78+
!llvm.module.flags = !{!0}
79+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
7880
;.
7981
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
8082
;.
8183
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
8284
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
8385
;.
84-
85-
!llvm.module.flags = !{!0}
86-
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
86+
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
87+
;.
88+
; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
89+
;.

0 commit comments

Comments
 (0)