-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass #89208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass #89208
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 | ||
; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s | ||
|
||
%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item } | ||
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } | ||
|
||
@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0 | ||
@llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1 | ||
@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }, no_sanitize_address | ||
|
||
; Function Attrs: sanitize_address | ||
;. | ||
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]] | ||
; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]] | ||
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 0 } }, no_sanitize_address | ||
; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" | ||
; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 | ||
; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 | ||
; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 | ||
; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] | ||
;. | ||
define amdgpu_kernel void @k0() #0 { | ||
; CHECK-LABEL: define amdgpu_kernel void @k0( | ||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: [[WID:.*]]: | ||
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() | ||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() | ||
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() | ||
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] | ||
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] | ||
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 | ||
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB39:.*]] | ||
; CHECK: [[MALLOC]]: | ||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 | ||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 | ||
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] | ||
; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() | ||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15 | ||
; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 | ||
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64 | ||
; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3 | ||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880 | ||
; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr | ||
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 | ||
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0 | ||
; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7 | ||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3 | ||
; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8 | ||
; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]] | ||
; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]] | ||
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]]) | ||
; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0 | ||
; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2:![0-9]+]] | ||
; CHECK: [[ASAN_REPORT]]: | ||
; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]] | ||
; CHECK: [[BB24]]: | ||
; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR7:[0-9]+]] | ||
; CHECK-NEXT: call void @llvm.amdgcn.unreachable() | ||
; CHECK-NEXT: br label %[[BB25]] | ||
; CHECK: [[BB25]]: | ||
; CHECK-NEXT: br label %[[BB26]] | ||
; CHECK: [[BB26]]: | ||
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 | ||
; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 | ||
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 0 | ||
; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 1 | ||
; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 1 | ||
; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 | ||
; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]] | ||
; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64 | ||
; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0) | ||
; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64 | ||
; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]]) | ||
; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) | ||
; CHECK-NEXT: store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 | ||
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 8 | ||
; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64 | ||
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 24) | ||
; CHECK-NEXT: br label %[[BB39]] | ||
; CHECK: [[BB39]]: | ||
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ] | ||
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() | ||
; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 | ||
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]] | ||
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] | ||
; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32 | ||
; CHECK-NEXT: [[TMP47:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 | ||
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP47]], i32 [[TMP46]] | ||
; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64 | ||
; CHECK-NEXT: [[TMP50:%.*]] = lshr i64 [[TMP49]], 3 | ||
; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880 | ||
; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr | ||
; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1 | ||
; CHECK-NEXT: [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0 | ||
; CHECK-NEXT: [[TMP55:%.*]] = and i64 [[TMP49]], 7 | ||
; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8 | ||
; CHECK-NEXT: [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP53]] | ||
; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]] | ||
; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]]) | ||
; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0 | ||
; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB59:.*]], !prof [[PROF2]] | ||
; CHECK: [[ASAN_REPORT1]]: | ||
; CHECK-NEXT: br i1 [[TMP58]], label %[[BB57:.*]], label %[[BB58:.*]] | ||
; CHECK: [[BB57]]: | ||
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]] | ||
; CHECK-NEXT: call void @llvm.amdgcn.unreachable() | ||
; CHECK-NEXT: br label %[[BB58]] | ||
; CHECK: [[BB58]]: | ||
; CHECK-NEXT: br label %[[BB59]] | ||
; CHECK: [[BB59]]: | ||
; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP45]], align 4 | ||
; CHECK-NEXT: br label %[[CONDFREE:.*]] | ||
; CHECK: [[CONDFREE]]: | ||
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() | ||
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] | ||
; CHECK: [[FREE]]: | ||
; CHECK-NEXT: [[TMP64:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 | ||
; CHECK-NEXT: [[TMP65:%.*]] = call ptr @llvm.returnaddress(i32 0) | ||
; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP65]] to i64 | ||
; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(1) [[TMP64]] to i64 | ||
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP67]], i64 [[TMP66]]) | ||
; CHECK-NEXT: br label %[[END]] | ||
; CHECK: [[END]]: | ||
; CHECK-NEXT: ret void | ||
; | ||
WId: | ||
%0 = call i32 @llvm.amdgcn.workitem.id.x() | ||
%1 = call i32 @llvm.amdgcn.workitem.id.y() | ||
%2 = call i32 @llvm.amdgcn.workitem.id.z() | ||
%3 = or i32 %0, %1 | ||
%4 = or i32 %3, %2 | ||
%5 = icmp eq i32 %4, 0 | ||
br i1 %5, label %Malloc, label %21 | ||
|
||
Malloc: | ||
%6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 | ||
%7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 | ||
%8 = add i32 %6, %7 | ||
%9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() | ||
%10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15 | ||
store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 | ||
%11 = load i32, ptr addrspace(4) %10, align 4 | ||
store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 | ||
%12 = add i32 %11, 0 | ||
%13 = udiv i32 %12, 1 | ||
%14 = mul i32 %13, 1 | ||
store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 | ||
%15 = add i32 %8, %14 | ||
%16 = zext i32 %15 to i64 | ||
%17 = call ptr @llvm.returnaddress(i32 0) | ||
%18 = ptrtoint ptr %17 to i64 | ||
%19 = call i64 @__asan_malloc_impl(i64 %16, i64 %18) | ||
%20 = inttoptr i64 %19 to ptr addrspace(1) | ||
store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 | ||
br label %21 | ||
|
||
21: | ||
%xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] | ||
call void @llvm.amdgcn.s.barrier() | ||
%22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 | ||
%23 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %22 | ||
call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] | ||
store i8 7, ptr addrspace(3) %23, align 4 | ||
br label %CondFree | ||
|
||
CondFree: | ||
call void @llvm.amdgcn.s.barrier() | ||
br i1 %xyzCond, label %Free, label %End | ||
|
||
Free: | ||
%24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use named values in tests There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test IR input is generated by running amdgpu-sw-lower-lds pass. Unnamed values are seen because of this. Will update them with named value if needed. |
||
%25 = call ptr @llvm.returnaddress(i32 0) | ||
%26 = ptrtoint ptr %25 to i64 | ||
%27 = ptrtoint ptr addrspace(1) %24 to i64 | ||
call void @__asan_free_impl(i64 %27, i64 %26) | ||
br label %End | ||
|
||
End: | ||
ret void | ||
} | ||
|
||
declare void @llvm.donothing() #1 | ||
declare i32 @llvm.amdgcn.workitem.id.x() #2 | ||
declare i32 @llvm.amdgcn.workitem.id.y() #2 | ||
declare i32 @llvm.amdgcn.workitem.id.z() #2 | ||
declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2 | ||
declare ptr @llvm.returnaddress(i32 immarg) #1 | ||
declare i64 @__asan_malloc_impl(i64, i64) | ||
declare void @llvm.amdgcn.s.barrier() #3 | ||
|
||
declare void @__asan_free_impl(i64, i64) | ||
|
||
attributes #0 = { sanitize_address "amdgpu-lds-size"="8,8" } | ||
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } | ||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | ||
attributes #3 = { convergent nocallback nofree nounwind willreturn } | ||
|
||
!0 = !{i32 0, i32 1} | ||
!1 = !{i32 8, i32 9} | ||
;. | ||
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="32,32" } | ||
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } | ||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | ||
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } | ||
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } | ||
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } | ||
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind } | ||
; CHECK: attributes #[[ATTR7]] = { nomerge } | ||
;. | ||
; CHECK: [[META0]] = !{i32 0, i32 1} | ||
; CHECK: [[META1]] = !{i32 32, i32 33} | ||
; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} | ||
;. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the test name, the case without no_sanitize_address is more interesting? Skipping this would be a separate test?