Skip to content

[ASAN][AMDGPU] Make address sanitizer checks more efficient for the divergent target. #72247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison";

const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared";
const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";

// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
Expand Down Expand Up @@ -699,6 +701,8 @@ struct AddressSanitizer {
Instruction *InsertBefore, Value *Addr,
uint32_t TypeStoreSize, bool IsWrite,
Value *SizeArgument);
Instruction *genAMDGPUReportBlock(IRBuilder<> &IRB, Value *Cond,
bool Recover);
void instrumentUnusualSizeOrAlignment(Instruction *I,
Instruction *InsertBefore, Value *Addr,
TypeSize TypeStoreSize, bool IsWrite,
Expand Down Expand Up @@ -1721,6 +1725,30 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
return InsertBefore;
}

Instruction *AddressSanitizer::genAMDGPUReportBlock(IRBuilder<> &IRB,
Value *Cond, bool Recover) {
Module &M = *IRB.GetInsertBlock()->getModule();
Value *ReportCond = Cond;
if (!Recover) {
auto Ballot = M.getOrInsertFunction(kAMDGPUBallotName, IRB.getInt64Ty(),
IRB.getInt1Ty());
ReportCond = IRB.CreateIsNotNull(IRB.CreateCall(Ballot, {Cond}));
}

auto *Trm =
SplitBlockAndInsertIfThen(ReportCond, &*IRB.GetInsertPoint(), false,
MDBuilder(*C).createBranchWeights(1, 100000));
Trm->getParent()->setName("asan.report");

if (Recover)
return Trm;

Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
IRB.SetInsertPoint(Trm);
return IRB.CreateCall(
M.getOrInsertFunction(kAMDGPUUnreachableName, IRB.getVoidTy()), {});
}

void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment,
Expand Down Expand Up @@ -1772,7 +1800,15 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
size_t Granularity = 1ULL << Mapping.Scale;
Instruction *CrashTerm = nullptr;

if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) {
bool GenSlowPath = (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity));

if (TargetTriple.isAMDGCN()) {
if (GenSlowPath) {
auto *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize);
Cmp = IRB.CreateAnd(Cmp, Cmp2);
}
CrashTerm = genAMDGPUReportBlock(IRB, Cmp, Recover);
} else if (GenSlowPath) {
// We use branch weights for the slow path check, to indicate that the slow
// path is rarely taken. This seems to be the case for SPEC benchmarks.
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,23 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: 6:
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP0]], 7
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 3
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
; CHECK-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12]]
; CHECK: 11:
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: 12:
; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP0]], 7
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 3
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP11]], 0
; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]]
; CHECK: 13:
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP0]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP14]]
; CHECK: 14:
; CHECK-NEXT: br label [[TMP15]]
; CHECK: 15:
; CHECK-NEXT: [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
; CHECK-NEXT: ret void
;
Expand All @@ -42,19 +48,16 @@ define protected amdgpu_kernel void @constant_load(i64 %i) sanitize_address {
; RECOV-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
; RECOV-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
; RECOV-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
; RECOV-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]]
; RECOV: 6:
; RECOV-NEXT: [[TMP7:%.*]] = and i64 [[TMP0]], 7
; RECOV-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 3
; RECOV-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i8
; RECOV-NEXT: [[TMP10:%.*]] = icmp sge i8 [[TMP9]], [[TMP4]]
; RECOV-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]]
; RECOV: 11:
; RECOV-NEXT: [[TMP6:%.*]] = and i64 [[TMP0]], 7
; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 3
; RECOV-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i8
; RECOV-NEXT: [[TMP9:%.*]] = icmp sge i8 [[TMP8]], [[TMP4]]
; RECOV-NEXT: [[TMP10:%.*]] = and i1 [[TMP5]], [[TMP9]]
; RECOV-NEXT: br i1 [[TMP10]], label [[ASAN_REPORT:%.*]], label [[TMP11:%.*]], !prof [[PROF2:![0-9]+]]
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_load4_noabort(i64 [[TMP0]]) #[[ATTR3:[0-9]+]]
; RECOV-NEXT: br label [[TMP12]]
; RECOV: 12:
; RECOV-NEXT: br label [[TMP13]]
; RECOV: 13:
; RECOV-NEXT: br label [[TMP11]]
; RECOV: 11:
; RECOV-NEXT: [[Q:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
; RECOV-NEXT: ret void
;
Expand All @@ -75,11 +78,18 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
; CHECK: 6:
; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR3]]
; CHECK-NEXT: unreachable
; CHECK: 7:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[ASAN_REPORT:%.*]], label [[TMP10:%.*]], !prof [[PROF2]]
; CHECK: asan.report:
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP8:%.*]], label [[TMP9:%.*]]
; CHECK: 8:
; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP0]]) #[[ATTR5]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label [[TMP9]]
; CHECK: 9:
; CHECK-NEXT: br label [[TMP10]]
; CHECK: 10:
; CHECK-NEXT: [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
; CHECK-NEXT: ret void
;
Expand All @@ -93,11 +103,11 @@ define protected amdgpu_kernel void @constant_load_8(i64 %i) sanitize_address {
; RECOV-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
; RECOV-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
; RECOV-NEXT: [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
; RECOV-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
; RECOV: 6:
; RECOV-NEXT: br i1 [[TMP5]], label [[ASAN_REPORT:%.*]], label [[TMP6:%.*]], !prof [[PROF2]]
; RECOV: asan.report:
; RECOV-NEXT: call void @__asan_report_load8_noabort(i64 [[TMP0]]) #[[ATTR3]]
; RECOV-NEXT: br label [[TMP7]]
; RECOV: 7:
; RECOV-NEXT: br label [[TMP6]]
; RECOV: 6:
; RECOV-NEXT: [[Q:%.*]] = load i64, ptr addrspace(4) [[A]], align 8
; RECOV-NEXT: ret void
;
Expand Down
Loading