Skip to content

[AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass #89208

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
340 changes: 296 additions & 44 deletions llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s

%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 }

@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0
@llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the test name, the case without no_sanitize_address is more interesting? Skipping this would be a separate test?

@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }, no_sanitize_address

; Function Attrs: sanitize_address
;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 0 } }, no_sanitize_address
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata"
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64
; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64
; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }]
;.
define amdgpu_kernel void @k0() #0 {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB39:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880
; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0
; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3
; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8
; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]]
; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]])
; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2:![0-9]+]]
; CHECK: [[ASAN_REPORT]]:
; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]]
; CHECK: [[BB24]]:
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label %[[BB25]]
; CHECK: [[BB25]]:
; CHECK-NEXT: br label %[[BB26]]
; CHECK: [[BB26]]:
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4
; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 0
; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 1
; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 1
; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64
; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]])
; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 8
; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 24)
; CHECK-NEXT: br label %[[BB39]]
; CHECK: [[BB39]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]]
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32
; CHECK-NEXT: [[TMP47:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP47]], i32 [[TMP46]]
; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64
; CHECK-NEXT: [[TMP50:%.*]] = lshr i64 [[TMP49]], 3
; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880
; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr
; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
; CHECK-NEXT: [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0
; CHECK-NEXT: [[TMP55:%.*]] = and i64 [[TMP49]], 7
; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8
; CHECK-NEXT: [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP53]]
; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]]
; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]])
; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0
; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB59:.*]], !prof [[PROF2]]
; CHECK: [[ASAN_REPORT1]]:
; CHECK-NEXT: br i1 [[TMP58]], label %[[BB57:.*]], label %[[BB58:.*]]
; CHECK: [[BB57]]:
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label %[[BB58]]
; CHECK: [[BB58]]:
; CHECK-NEXT: br label %[[BB59]]
; CHECK: [[BB59]]:
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP45]], align 4
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP64:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP65:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP65]] to i64
; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(1) [[TMP64]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP67]], i64 [[TMP66]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
WId:
%0 = call i32 @llvm.amdgcn.workitem.id.x()
%1 = call i32 @llvm.amdgcn.workitem.id.y()
%2 = call i32 @llvm.amdgcn.workitem.id.z()
%3 = or i32 %0, %1
%4 = or i32 %3, %2
%5 = icmp eq i32 %4, 0
br i1 %5, label %Malloc, label %21

Malloc:
%6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4
%8 = add i32 %6, %7
%9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15
store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
%11 = load i32, ptr addrspace(4) %10, align 4
store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4
%12 = add i32 %11, 0
%13 = udiv i32 %12, 1
%14 = mul i32 %13, 1
store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
%15 = add i32 %8, %14
%16 = zext i32 %15 to i64
%17 = call ptr @llvm.returnaddress(i32 0)
%18 = ptrtoint ptr %17 to i64
%19 = call i64 @__asan_malloc_impl(i64 %16, i64 %18)
%20 = inttoptr i64 %19 to ptr addrspace(1)
store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
br label %21

21:
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ]
call void @llvm.amdgcn.s.barrier()
%22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
%23 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %22
call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ]
store i8 7, ptr addrspace(3) %23, align 4
br label %CondFree

CondFree:
call void @llvm.amdgcn.s.barrier()
br i1 %xyzCond, label %Free, label %End

Free:
%24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use named values in tests

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test IR input is generated by running amdgpu-sw-lower-lds pass. Unnamed values are seen because of this. Will update them with named value if needed.

%25 = call ptr @llvm.returnaddress(i32 0)
%26 = ptrtoint ptr %25 to i64
%27 = ptrtoint ptr addrspace(1) %24 to i64
call void @__asan_free_impl(i64 %27, i64 %26)
br label %End

End:
ret void
}

declare void @llvm.donothing() #1
declare i32 @llvm.amdgcn.workitem.id.x() #2
declare i32 @llvm.amdgcn.workitem.id.y() #2
declare i32 @llvm.amdgcn.workitem.id.z() #2
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
declare ptr @llvm.returnaddress(i32 immarg) #1
declare i64 @__asan_malloc_impl(i64, i64)
declare void @llvm.amdgcn.s.barrier() #3

declare void @__asan_free_impl(i64, i64)

attributes #0 = { sanitize_address "amdgpu-lds-size"="8,8" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #3 = { convergent nocallback nofree nounwind willreturn }

!0 = !{i32 0, i32 1}
!1 = !{i32 8, i32 9}
;.
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="32,32" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind }
; CHECK: attributes #[[ATTR7]] = { nomerge }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{i32 32, i32 33}
; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
;.
Loading
Loading