Skip to content

Commit ae82ec9

Browse files
committed
AMDGPU: Drop amdgpu-no-lds-kernel-id attribute in LDS lowering
This is in preparation for moving the run of AMDGPUAttributor earlier. Currently it infers the lack of the corresponding intrinsic calls, so if we introduce new ones we need to remove the attribute from any possible transitive callers. This is more conservative than necessary, we could try to identify specific subgraphs where LDS globals are not used. Other options include teaching the attributor to avoid adding it in cases where the lowering may choose the table, but this seems more complex. Alternatively could add a second run which doesn't seem worth it.
1 parent b44406a commit ae82ec9

File tree

2 files changed

+51
-7
lines changed

2 files changed

+51
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,42 @@ class AMDGPULowerModuleLDS {
10271027
return N;
10281028
}
10291029

1030+
/// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have
1031+
/// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred
1032+
/// the lack of llvm.amdgcn.lds.kernel.id calls.
1033+
void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) {
1034+
KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id");
1035+
1036+
SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
1037+
if (!Tmp.back())
1038+
return;
1039+
1040+
do {
1041+
Function *F = Tmp.pop_back_val();
1042+
1043+
for (auto &N : *CG[F]) {
1044+
if (!N.second)
1045+
continue;
1046+
1047+
Function *Callee = N.second->getFunction();
1048+
if (!Callee) {
1049+
// If we see any indirect calls, assume nothing about potential
1050+
// targets.
1051+
// TODO: This could be refined to possible LDS global users.
1052+
for (auto &N : *CG.getCallsExternalNode()) {
1053+
Function *PotentialCallee = N.second->getFunction();
1054+
Tmp.push_back(PotentialCallee);
1055+
}
1056+
1057+
continue;
1058+
}
1059+
1060+
Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
1061+
Tmp.push_back(Callee);
1062+
}
1063+
} while (!Tmp.empty());
1064+
}
1065+
10301066
DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
10311067
Module &M, LDSUsesInfoTy &LDSUsesInfo,
10321068
DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS,
@@ -1176,6 +1212,13 @@ class AMDGPULowerModuleLDS {
11761212
M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
11771213
replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
11781214
LookupTable);
1215+
1216+
// Strip amdgpu-no-lds-kernel-id from all functions reachable from the
1217+
// kernel. We may have inferred this wasn't used prior to the pass.
1218+
//
1219+
// TODO: We could filter out subgraphs that do not access LDS globals.
1220+
for (Function *F : KernelsThatAllocateTableLDS)
1221+
removeNoLdsKernelIdFromReachable(CG, F);
11791222
}
11801223

11811224
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =

llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ define amdgpu_kernel void @kernel_lds() {
148148

149149
define internal i16 @mutual_recursion_0(i16 %arg) {
150150
; CHECK-LABEL: define internal i16 @mutual_recursion_0(
151-
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
151+
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR5:[0-9]+]] {
152152
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
153153
; CHECK-NEXT: [[RECURSIVE_KERNEL_LDS:%.*]] = getelementptr inbounds [3 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1
154154
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[RECURSIVE_KERNEL_LDS]], align 4
@@ -168,7 +168,7 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
168168

169169
define internal void @mutual_recursion_1(i16 %arg) {
170170
; CHECK-LABEL: define internal void @mutual_recursion_1(
171-
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
171+
; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR5]] {
172172
; CHECK-NEXT: call void @mutual_recursion_0(i16 [[ARG]])
173173
; CHECK-NEXT: ret void
174174
;
@@ -188,13 +188,14 @@ define amdgpu_kernel void @kernel_lds_recursion() {
188188
}
189189

190190
;.
191-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
192-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
193-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
191+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
192+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
193+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
194194
; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
195195
; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
196-
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
197-
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
196+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
197+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
198+
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
198199
;.
199200
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
200201
; CHECK: [[META1:![0-9]+]] = !{i32 0}

0 commit comments

Comments
 (0)