Skip to content

Commit 8bc22e1

Browse files
committed
[ASAN][AMDGPU] Make address sanitizer checks more efficient for the divergent target.
1 parent 92c2529 commit 8bc22e1

File tree

4 files changed

+259
-173
lines changed

4 files changed

+259
-173
lines changed

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";
174174

175175
const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
176176
const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
177+
const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
178+
const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
177179

178180
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
179181
static const size_t kNumberOfAccessSizes = 5;
@@ -699,6 +701,8 @@ struct AddressSanitizer {
699701
Instruction *InsertBefore, Value *Addr,
700702
uint32_t TypeStoreSize, bool IsWrite,
701703
Value *SizeArgument);
704+
Instruction *genAMDGPUReportBlock(IRBuilder<> &IRB, Value *Cond,
705+
bool Recover);
702706
void instrumentUnusualSizeOrAlignment(Instruction *I,
703707
Instruction *InsertBefore, Value *Addr,
704708
TypeSize TypeStoreSize, bool IsWrite,
@@ -1721,6 +1725,30 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
17211725
return InsertBefore;
17221726
}
17231727

1728+
Instruction *AddressSanitizer::genAMDGPUReportBlock(IRBuilder<> &IRB,
1729+
Value *Cond, bool Recover) {
1730+
Module &M = *IRB.GetInsertBlock()->getModule();
1731+
Value *ReportCond = Cond;
1732+
if (!Recover) {
1733+
auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
1734+
IRB.getInt1Ty());
1735+
ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
1736+
}
1737+
1738+
auto *Trm =
1739+
SplitBlockAndInsertIfThen(ReportCond, &*IRB.GetInsertPoint(), false,
1740+
MDBuilder(*C).createBranchWeights(1, 100000));
1741+
Trm->getParent()->setName("asan.report");
1742+
1743+
if (Recover)
1744+
return Trm;
1745+
1746+
Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
1747+
IRB.SetInsertPoint(Trm);
1748+
return IRB.CreateCall(
1749+
M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
1750+
}
1751+
17241752
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
17251753
Instruction *InsertBefore, Value *Addr,
17261754
MaybeAlign Alignment,
@@ -1772,7 +1800,15 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
17721800
size_t Granularity = 1ULL << Mapping.Scale;
17731801
Instruction *CrashTerm = nullptr;
17741802

1775-
if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) {
1803+
bool GenSlowPath = (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity));
1804+
1805+
if (TargetTriple.isAMDGCN()) {
1806+
if (GenSlowPath) {
1807+
auto *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize);
1808+
Cmp = IRB.CreateAnd(Cmp, Cmp2);
1809+
}
1810+
CrashTerm = genAMDGPUReportBlock(IRB, Cmp, Recover);
1811+
} else if (GenSlowPath) {
17761812
// We use branch weights for the slow path check, to indicate that the slow
17771813
// path is rarely taken. This seems to be the case for SPEC benchmarks.
17781814
Instruction *CheckTerm = SplitBlockAndInsertIfThen(

llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_constant_address_space.ll

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,23 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
1818
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
1919
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
2020
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
21-
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF2:![0-9]+]]
22-
; CHECK: 6:
23-
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP0]], 7
24-
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 3
25-
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
26-
; CHECK-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
27-
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
28-
; CHECK: 11:
29-
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
30-
; CHECK-NEXT: unreachable
31-
; CHECK: 12:
21+
; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP0]], 7
22+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 3
23+
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
24+
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
25+
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
26+
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
27+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
28+
; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF2:![0-9]+]]
29+
; CHECK: asan.report:
30+
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
31+
; CHECK: 13:
32+
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
33+
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
34+
; CHECK-NEXT: br label [[TMP14]]
35+
; CHECK: 14:
36+
; CHECK-NEXT: br label [[TMP15]]
37+
; CHECK: 15:
3238
; CHECK-NEXT: [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
3339
; CHECK-NEXT: ret void
3440
;
@@ -42,19 +48,16 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
4248
; RECOV-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
4349
; RECOV-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
4450
; RECOV-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
45-
; RECOV-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]]
46-
; RECOV: 6:
47-
; RECOV-NEXT: [[TMP7:%.*]] = and i64 [[TMP0]], 7
48-
; RECOV-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 3
49-
; RECOV-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
50-
; RECOV-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
51-
; RECOV-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
52-
; RECOV: 11:
51+
; RECOV-NEXT: [[TMP6:%.*]] = and i64 [[TMP0]], 7
52+
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 3
53+
; RECOV-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
54+
; RECOV-NEXT: [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
55+
; RECOV-NEXT: [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
56+
; RECOV-NEXT: br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF2:![0-9]+]]
57+
; RECOV: asan.report:
5358
; RECOV-NEXT: call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
54-
; RECOV-NEXT: br label [[TMP12]]
55-
; RECOV: 12:
56-
; RECOV-NEXT: br label [[TMP13]]
57-
; RECOV: 13:
59+
; RECOV-NEXT: br label [[TMP11]]
60+
; RECOV: 11:
5861
; RECOV-NEXT: [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
5962
; RECOV-NEXT: ret void
6063
;
@@ -75,11 +78,18 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
7578
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
7679
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
7780
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
78-
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
79-
; CHECK: 6:
80-
; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
81-
; CHECK-NEXT: unreachable
82-
; CHECK: 7:
81+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
82+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
83+
; CHECK-NEXT: br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF2]]
84+
; CHECK: asan.report:
85+
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
86+
; CHECK: 8:
87+
; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
88+
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
89+
; CHECK-NEXT: br label [[TMP9]]
90+
; CHECK: 9:
91+
; CHECK-NEXT: br label [[TMP10]]
92+
; CHECK: 10:
8393
; CHECK-NEXT: [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
8494
; CHECK-NEXT: ret void
8595
;
@@ -93,11 +103,11 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
93103
; RECOV-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
94104
; RECOV-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
95105
; RECOV-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
96-
; RECOV-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
97-
; RECOV: 6:
106+
; RECOV-NEXT: br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF2]]
107+
; RECOV: asan.report:
98108
; RECOV-NEXT: call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
99-
; RECOV-NEXT: br label [[TMP7]]
100-
; RECOV: 7:
109+
; RECOV-NEXT: br label [[TMP6]]
110+
; RECOV: 6:
101111
; RECOV-NEXT: [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
102112
; RECOV-NEXT: ret void
103113
;

0 commit comments

Comments
 (0)