Skip to content

Commit f2eadcb

Browse files
committed
Try to use CallGraph less badly
1 parent ae82ec9 commit f2eadcb

File tree

2 files changed

+22
-14
lines changed

2 files changed

+22
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,9 @@ class AMDGPULowerModuleLDS {
10371037
if (!Tmp.back())
10381038
return;
10391039

1040+
SmallPtrSet<Function *, 8> Visited;
1041+
bool SeenUnknownCall = false;
1042+
10401043
do {
10411044
Function *F = Tmp.pop_back_val();
10421045

@@ -1046,19 +1049,25 @@ class AMDGPULowerModuleLDS {
10461049

10471050
Function *Callee = N.second->getFunction();
10481051
if (!Callee) {
1049-
// If we see any indirect calls, assume nothing about potential
1050-
// targets.
1051-
// TODO: This could be refined to possible LDS global users.
1052-
for (auto &N : *CG.getCallsExternalNode()) {
1053-
Function *PotentialCallee = N.second->getFunction();
1054-
Tmp.push_back(PotentialCallee);
1055-
}
1052+
if (!SeenUnknownCall) {
1053+
SeenUnknownCall = true;
1054+
1055+
// If we see any indirect calls, assume nothing about potential
1056+
// targets.
1057+
// TODO: This could be refined to possible LDS global users.
1058+
for (auto &N : *CG.getExternalCallingNode()) {
1059+
Function *PotentialCallee = N.second->getFunction();
1060+
if (!isKernelLDS(PotentialCallee))
1061+
PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id");
1062+
}
10561063

1057-
continue;
1064+
continue;
1065+
}
10581066
}
10591067

10601068
Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
1061-
Tmp.push_back(Callee);
1069+
if (Visited.insert(Callee).second)
1070+
Tmp.push_back(Callee);
10621071
}
10631072
} while (!Tmp.empty());
10641073
}

llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ define amdgpu_kernel void @kernel_lds() {
148148

149149
define internal i16 @mutual_recursion_0(i16 %arg) {
150150
; CHECK-LABEL: define internal i16 @mutual_recursion_0(
151-
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR5:[0-9]+]] {
151+
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
152152
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
153153
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
154154
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
@@ -168,7 +168,7 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
168168

169169
define internal void @mutual_recursion_1(i16 %arg) {
170170
; CHECK-LABEL: define internal void @mutual_recursion_1(
171-
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR5]] {
171+
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
172172
; CHECK-NEXT: call void @mutual_recursion_0(i16 [[ARG]])
173173
; CHECK-NEXT: ret void
174174
;
@@ -193,9 +193,8 @@ define amdgpu_kernel void @kernel_lds_recursion() {
193193
; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
194194
; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
195195
; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
196-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
197-
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
198-
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
196+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
197+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
199198
;.
200199
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
201200
; CHECK: [[META1:![0-9]+]] = !{i32 0}

0 commit comments

Comments
 (0)