Skip to content

Commit 5280095

Browse files
committed
[AMDGPU] Lower LDS in kernels without sanitize_address in amdgpu-sw-lower-lds.
1 parent 0d68bad commit 5280095

File tree

5 files changed

+363
-4
lines changed

5 files changed

+363
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
298298
for (User *V : GV->users()) {
299299
if (auto *I = dyn_cast<Instruction>(V)) {
300300
Function *F = I->getFunction();
301-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
302-
!F->isDeclaration())
301+
if (!isKernelLDS(F) && !F->isDeclaration())
303302
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
304303
}
305304
}
@@ -1135,6 +1134,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11351134
AsanInfo.Offset = Offset;
11361135
}
11371136

1137+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1138+
for (auto &K : LDSAccesses) {
1139+
Function *F = K.first;
1140+
if (!F)
1141+
continue;
1142+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1143+
return true;
1144+
}
1145+
return false;
1146+
}
1147+
11381148
bool AMDGPUSwLowerLDS::run() {
11391149
bool Changed = false;
11401150

@@ -1145,6 +1155,14 @@ bool AMDGPUSwLowerLDS::run() {
11451155
// Get all the direct and indirect access of LDS for all the kernels.
11461156
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11471157

1158+
// Flag to decide whether to lower all the LDS accesses
1159+
// based on sanitize_address attribute.
1160+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1161+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1162+
1163+
if (!LowerAllLDS)
1164+
return Changed;
1165+
11481166
// Utility to group LDS access into direct, indirect, static and dynamic.
11491167
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11501168
bool DirectAccess) {
@@ -1154,8 +1172,6 @@ bool AMDGPUSwLowerLDS::run() {
11541172
continue;
11551173

11561174
assert(isKernelLDS(F));
1157-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1158-
continue;
11591175

11601176
// Only inserts if key isn't already in the map.
11611177
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1222,6 +1238,7 @@ bool AMDGPUSwLowerLDS::run() {
12221238
// Get non-kernels with LDS ptr as argument and called by kernels.
12231239
getNonKernelsWithLDSArguments(CG);
12241240

1241+
// Lower LDS accesses in non-kernels.
12251242
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12261243
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12271244
NonKernelLDSParameters NKLDSParams;
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
5+
; other kernels in module have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
define amdgpu_kernel void @k0() sanitize_address {
10+
; CHECK-LABEL: define amdgpu_kernel void @k0(
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[WID:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
14+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
15+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
16+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
17+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
18+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
19+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB20:.*]]
20+
; CHECK: [[MALLOC]]:
21+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
22+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
23+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
24+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
25+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
26+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
27+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
28+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
29+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
30+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
31+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
32+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
33+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
34+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
35+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
36+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
37+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
38+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
39+
; CHECK-NEXT: br label %[[BB20]]
40+
; CHECK: [[BB20]]:
41+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
42+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
43+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
44+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
45+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
46+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
47+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
48+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
49+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
50+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
51+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
52+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
53+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
54+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
55+
; CHECK: [[CONDFREE]]:
56+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
57+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
58+
; CHECK: [[FREE]]:
59+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
60+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
61+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
62+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
63+
; CHECK-NEXT: br label %[[END]]
64+
; CHECK: [[END]]:
65+
; CHECK-NEXT: ret void
66+
;
67+
store i8 7, ptr addrspace(3) @lds_1, align 4
68+
store i32 8, ptr addrspace(3) @lds_2, align 2
69+
ret void
70+
}
71+
72+
define amdgpu_kernel void @k1() {
73+
; CHECK-LABEL: define amdgpu_kernel void @k1(
74+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
75+
; CHECK-NEXT: [[WID:.*]]:
76+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
77+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
78+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
79+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
80+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
81+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
82+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
83+
; CHECK: [[MALLOC]]:
84+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
85+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
86+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
87+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
88+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
89+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
90+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
91+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
92+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
93+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
94+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
95+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
96+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
97+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
98+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
99+
; CHECK-NEXT: br label %[[BB18]]
100+
; CHECK: [[BB18]]:
101+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
102+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
103+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
104+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
105+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
106+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
107+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
108+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
109+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
110+
; CHECK: [[CONDFREE]]:
111+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
112+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
113+
; CHECK: [[FREE]]:
114+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
115+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
116+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
117+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
118+
; CHECK-NEXT: br label %[[END]]
119+
; CHECK: [[END]]:
120+
; CHECK-NEXT: ret void
121+
;
122+
store i32 9, ptr addrspace(3) @lds_2, align 2
123+
ret void
124+
}
125+
126+
!llvm.module.flags = !{!0}
127+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
5+
; if all other kernels don't have sanitize_address attribute.
6+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
7+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
8+
9+
;.
10+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
11+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
12+
;.
13+
define amdgpu_kernel void @k0() {
14+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
15+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
16+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
17+
; CHECK-NEXT: ret void
18+
;
19+
store i8 7, ptr addrspace(3) @lds_1, align 4
20+
store i32 8, ptr addrspace(3) @lds_2, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @k1() {
25+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
26+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
27+
; CHECK-NEXT: ret void
28+
;
29+
store i32 9, ptr addrspace(3) @lds_2, align 2
30+
ret void
31+
}
32+
33+
!llvm.module.flags = !{!0}
34+
!0 = !{i32 4, !"nosanitize_address", i32 1}
35+
;.
36+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
37+
;.

0 commit comments

Comments
 (0)