Skip to content

Commit 2d7339a

Browse files
authored
[AMDGPU][LDS] Fix dynamic LDS interaction with "amdgpu-no-lds-kernel-id" (#107092)
Dynamic lds and Table lds both use the amdgpu_lds_kernel_id intrinsic. Kernels and functons that make an indirect use of this should not have the "amdgpu-no-lds-kernel-id" attribute. For the later, this was done. For the dynamic lds case, this was missing. This patch fixes it.
1 parent 660e34f commit 2d7339a

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,20 +1010,21 @@ class AMDGPULowerModuleLDS {
10101010
M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
10111011
replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
10121012
LookupTable);
1013-
1014-
// Strip amdgpu-no-lds-kernel-id from all functions reachable from the
1015-
// kernel. We may have inferred this wasn't used prior to the pass.
1016-
//
1017-
// TODO: We could filter out subgraphs that do not access LDS globals.
1018-
for (Function *F : KernelsThatAllocateTableLDS)
1019-
removeFnAttrFromReachable(CG, F, {"amdgpu-no-lds-kernel-id"});
10201013
}
10211014

10221015
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
10231016
lowerDynamicLDSVariables(M, LDSUsesInfo,
10241017
KernelsThatIndirectlyAllocateDynamicLDS,
10251018
DynamicVariables, OrderedKernels);
10261019

1020+
// Strip amdgpu-no-lds-kernel-id from all functions reachable from the
1021+
// kernel. We may have inferred this wasn't used prior to the pass.
1022+
// TODO: We could filter out subgraphs that do not access LDS globals.
1023+
for (auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1024+
&KernelsThatAllocateTableLDS})
1025+
for (Function *F : *KernelSet)
1026+
removeFnAttrFromReachable(CG, F, {"amdgpu-no-lds-kernel-id"});
1027+
10271028
// All kernel frames have been allocated. Calculate and record the
10281029
// addresses.
10291030
{

llvm/test/CodeGen/AMDGPU/lower-module-lds-zero-size-arr.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
;.
1313
define void @fn(float %val, i32 %idx) #0 {
1414
; CHECK-LABEL: define void @fn(
15-
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) #[[ATTR0:[0-9]+]] {
15+
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) {
1616
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
1717
; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
1818
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[VAR0]], align 4
@@ -28,7 +28,7 @@ define void @fn(float %val, i32 %idx) #0 {
2828

2929
define amdgpu_kernel void @kernelA(float %val, i32 %idx) #0 {
3030
; CHECK-LABEL: define amdgpu_kernel void @kernelA(
31-
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
31+
; CHECK-SAME: float [[VAL:%.*]], i32 [[IDX:%.*]]) !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] {
3232
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernelA.dynlds) ]
3333
; CHECK-NEXT: tail call void @fn(float [[VAL]], i32 [[IDX]])
3434
; CHECK-NEXT: ret void
@@ -40,9 +40,8 @@ define amdgpu_kernel void @kernelA(float %val, i32 %idx) #0 {
4040
attributes #0 = { "amdgpu-no-lds-kernel-id" }
4141

4242
;.
43-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-lds-kernel-id" }
44-
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
45-
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
43+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
44+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
4645
;.
4746
; CHECK: [[META0]] = !{i32 0, i32 1}
4847
; CHECK: [[META1]] = !{i32 0}

0 commit comments

Comments
 (0)