Skip to content

Commit ef87702

Browse files
committed
[AMDGPU] Lower LDS in kernels without sanitize_address in amdgpu-sw-lower-lds.
1 parent cb7298f commit ef87702

File tree

5 files changed

+365
-4
lines changed

5 files changed

+365
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,7 @@ void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() {
298298
for (User *V : GV->users()) {
299299
if (auto *I = dyn_cast<Instruction>(V)) {
300300
Function *F = I->getFunction();
301-
if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress) &&
302-
!F->isDeclaration())
301+
if (!isKernelLDS(F) && !F->isDeclaration())
303302
FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV);
304303
}
305304
}
@@ -1135,6 +1134,17 @@ void AMDGPUSwLowerLDS::initAsanInfo() {
11351134
AsanInfo.Offset = Offset;
11361135
}
11371136

1137+
static bool hasFnWithSanitizeAddressAttr(FunctionVariableMap &LDSAccesses) {
1138+
for (auto &K : LDSAccesses) {
1139+
Function *F = K.first;
1140+
if (!F)
1141+
continue;
1142+
if (F->hasFnAttribute(Attribute::SanitizeAddress))
1143+
return true;
1144+
}
1145+
return false;
1146+
}
1147+
11381148
bool AMDGPUSwLowerLDS::run() {
11391149
bool Changed = false;
11401150

@@ -1145,6 +1155,14 @@ bool AMDGPUSwLowerLDS::run() {
11451155
// Get all the direct and indirect access of LDS for all the kernels.
11461156
LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
11471157

1158+
// Flag to decide whether to lower all the LDS accesses
1159+
// based on sanitize_address attribute.
1160+
bool LowerAllLDS = hasFnWithSanitizeAddressAttr(LDSUsesInfo.direct_access) ||
1161+
hasFnWithSanitizeAddressAttr(LDSUsesInfo.indirect_access);
1162+
1163+
if (!LowerAllLDS)
1164+
return Changed;
1165+
11481166
// Utility to group LDS access into direct, indirect, static and dynamic.
11491167
auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses,
11501168
bool DirectAccess) {
@@ -1154,8 +1172,6 @@ bool AMDGPUSwLowerLDS::run() {
11541172
continue;
11551173

11561174
assert(isKernelLDS(F));
1157-
if (!F->hasFnAttribute(Attribute::SanitizeAddress))
1158-
continue;
11591175

11601176
// Only inserts if key isn't already in the map.
11611177
FuncLDSAccessInfo.KernelToLDSParametersMap.insert(
@@ -1222,6 +1238,7 @@ bool AMDGPUSwLowerLDS::run() {
12221238
// Get non-kernels with LDS ptr as argument and called by kernels.
12231239
getNonKernelsWithLDSArguments(CG);
12241240

1241+
// Lower LDS accesses in non-kernels.
12251242
if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() ||
12261243
!FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) {
12271244
NonKernelLDSParameters NKLDSParams;
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
4+
5+
; Test to check if static LDS accesses in kernels without sanitize_address attribute are lowered if
6+
; other kernels in module have sanitize_address attribute.
7+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
8+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
9+
10+
define amdgpu_kernel void @k0() sanitize_address {
11+
; CHECK-LABEL: define amdgpu_kernel void @k0(
12+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
13+
; CHECK-NEXT: WId:
14+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
15+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
16+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
17+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
18+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
19+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
20+
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]]
21+
; CHECK: Malloc:
22+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
23+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4
24+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
25+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
26+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
27+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
28+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
29+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
30+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
31+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
32+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
33+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
34+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33
35+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
36+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 31)
37+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68
38+
; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64
39+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28)
40+
; CHECK-NEXT: br label [[TMP20]]
41+
; CHECK: 20:
42+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
43+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
44+
; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
45+
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
46+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]]
47+
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4
48+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]]
49+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32
50+
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP26]]
51+
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4
52+
; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32
53+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP28]]
54+
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP29]], align 2
55+
; CHECK-NEXT: br label [[CONDFREE:%.*]]
56+
; CHECK: CondFree:
57+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
58+
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
59+
; CHECK: Free:
60+
; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0)
61+
; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64
62+
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64
63+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]])
64+
; CHECK-NEXT: br label [[END]]
65+
; CHECK: End:
66+
; CHECK-NEXT: ret void
67+
;
68+
store i8 7, ptr addrspace(3) @lds_1, align 4
69+
store i32 8, ptr addrspace(3) @lds_2, align 2
70+
ret void
71+
}
72+
73+
define amdgpu_kernel void @k1() {
74+
; CHECK-LABEL: define amdgpu_kernel void @k1(
75+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
76+
; CHECK-NEXT: WId:
77+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
78+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
79+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
80+
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
81+
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
82+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
83+
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP18:%.*]]
84+
; CHECK: Malloc:
85+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
86+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 2), align 4
87+
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
88+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
89+
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
90+
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
91+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
92+
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
93+
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
94+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
95+
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
96+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
97+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36
98+
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
99+
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28)
100+
; CHECK-NEXT: br label [[TMP18]]
101+
; CHECK: 18:
102+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
103+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
104+
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8
105+
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 1, i32 0), align 4
106+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP20]]
107+
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[TMP21]] to i32
108+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
109+
; CHECK-NEXT: store i32 9, ptr addrspace(1) [[TMP23]], align 2
110+
; CHECK-NEXT: br label [[CONDFREE:%.*]]
111+
; CHECK: CondFree:
112+
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
113+
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
114+
; CHECK: Free:
115+
; CHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.returnaddress(i32 0)
116+
; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP24]] to i64
117+
; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
118+
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP26]], i64 [[TMP25]])
119+
; CHECK-NEXT: br label [[END]]
120+
; CHECK: End:
121+
; CHECK-NEXT: ret void
122+
;
123+
store i32 9, ptr addrspace(3) @lds_2, align 2
124+
ret void
125+
}
126+
127+
!llvm.module.flags = !{!0}
128+
!0 = !{i32 4, !"nosanitize_address", i32 1}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
2+
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
; RUN: llc < %s -enable-new-pm -stop-after=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s
4+
5+
; Test to check if LDS accesses in kernels without sanitize_address attribute are not lowered
6+
; if all other kernels don't have sanitize_address attribute.
7+
@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
8+
@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
9+
10+
;.
11+
; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
12+
; CHECK: @lds_2 = internal addrspace(3) global [1 x i32] poison, align 8
13+
;.
14+
define amdgpu_kernel void @k0() {
15+
; CHECK-LABEL: define amdgpu_kernel void @k0() {
16+
; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
17+
; CHECK-NEXT: store i32 8, ptr addrspace(3) @lds_2, align 2
18+
; CHECK-NEXT: ret void
19+
;
20+
store i8 7, ptr addrspace(3) @lds_1, align 4
21+
store i32 8, ptr addrspace(3) @lds_2, align 2
22+
ret void
23+
}
24+
25+
define amdgpu_kernel void @k1() {
26+
; CHECK-LABEL: define amdgpu_kernel void @k1() {
27+
; CHECK-NEXT: store i32 9, ptr addrspace(3) @lds_2, align 2
28+
; CHECK-NEXT: ret void
29+
;
30+
store i32 9, ptr addrspace(3) @lds_2, align 2
31+
ret void
32+
}
33+
34+
!llvm.module.flags = !{!0}
35+
!0 = !{i32 4, !"nosanitize_address", i32 1}
36+
;.
37+
; CHECK: [[META0:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
38+
;.

0 commit comments

Comments
 (0)