Skip to content

Commit 1b24dd6

Browse files
skc7David Salinas
authored andcommitted
[AMDGPU] Skip lowerNonKernelLDSAccesses if function is declaration. (llvm#106975)
This PR skips lowering non-kernel LDS i.e lowerNonKernelLDSAccesses, when function is a declaration or there are no lds globals to process. Change-Id: I20e4653b0f62fe108e65e55c9e66585ded2a8795
1 parent 25c0a5b commit 1b24dd6

File tree

2 files changed

+100
-2
lines changed

2 files changed

+100
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

100644100755
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) {
272272
for (auto &I : *CGN) {
273273
CallGraphNode *CallerCGN = I.second;
274274
Function *CalledFunc = CallerCGN->getFunction();
275-
if (!CalledFunc)
275+
if (!CalledFunc || CalledFunc->isDeclaration())
276276
continue;
277277
if (AMDGPU::isKernelLDS(CalledFunc))
278278
continue;
@@ -300,7 +300,8 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
300300
for (User *V : GV->users()) {
301301
if (auto *I = dyn_cast<Instruction>(V)) {
302302
Function *F = I->getFunction();
303-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress))
303+
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
304+
!F->isDeclaration())
304305
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
305306
}
306307
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-sw-lower-lds < %s | FileCheck %s
3+
@lds = external addrspace(3) global [5 x i8], align 8
4+
declare void @non_kernel_declaration() sanitize_address
5+
6+
;.
7+
; CHECK: @llvm.amdgcn.sw.lds.k1 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]]
8+
; CHECK: @llvm.amdgcn.sw.lds.k1.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k1.md.type { %llvm.amdgcn.sw.lds.k1.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k1.md.item { i32 32, i32 5, i32 32 } }, no_sanitize_address
9+
; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k1], no_sanitize_address
10+
; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [1 x ptr addrspace(1)]] [[1 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k1.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0)]], no_sanitize_address
11+
;.
12+
define void @non_kernel_function() sanitize_address {
13+
; CHECK-LABEL: define void @non_kernel_function(
14+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
15+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
16+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
17+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4
18+
; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8
19+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x [1 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
20+
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
21+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
22+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
23+
; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
24+
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
25+
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
26+
; CHECK-NEXT: ret void
27+
;
28+
%Y = addrspacecast ptr addrspace(3) @lds to ptr
29+
store i8 5, ptr addrspacecast( ptr addrspace(3) @lds to ptr), align 8
30+
ret void
31+
}
32+
33+
define amdgpu_kernel void @k1() sanitize_address {
34+
; CHECK-LABEL: define amdgpu_kernel void @k1(
35+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
36+
; CHECK-NEXT: [[WID:.*]]:
37+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
38+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
39+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
40+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
41+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
42+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
43+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
44+
; CHECK: [[MALLOC]]:
45+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
46+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
47+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
48+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
49+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
50+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
51+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
52+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
53+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
54+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
55+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
56+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
57+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 37
58+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
59+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 27)
60+
; CHECK-NEXT: br label %[[BB18]]
61+
; CHECK: [[BB18]]:
62+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
63+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
64+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
65+
; CHECK-NEXT: call void @non_kernel_function()
66+
; CHECK-NEXT: call void @non_kernel_declaration()
67+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
68+
; CHECK: [[CONDFREE]]:
69+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
70+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
71+
; CHECK: [[FREE]]:
72+
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
73+
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
74+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
75+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
76+
; CHECK-NEXT: br label %[[END]]
77+
; CHECK: [[END]]:
78+
; CHECK-NEXT: ret void
79+
;
80+
call void @non_kernel_function()
81+
call void @non_kernel_declaration()
82+
ret void
83+
}
84+
85+
!llvm.module.flags = !{!0}
86+
!0 = !{i32 4, !"nosanitize_address", i32 1}
87+
;.
88+
; CHECK: attributes #[[ATTR0]] = { sanitize_address }
89+
; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" }
90+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
91+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
92+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
93+
;.
94+
; CHECK: [[META0]] = !{i32 0, i32 1}
95+
; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
96+
; CHECK: [[META2]] = !{i32 0}
97+
;.

0 commit comments

Comments
 (0)