Skip to content

Commit df0dc3b

Browse files
authored
[DeviceSanitizer] Support out-of-bounds on private memory (#13935)
UR: oneapi-src/unified-runtime#1676 Instrument "__asan_mem_to_shadow" to convert private address to its shadow memory address Other steps are same with ASan on stack.
1 parent d5eb1e5 commit df0dc3b

File tree

9 files changed

+308
-63
lines changed

9 files changed

+308
-63
lines changed

libdevice/include/asan_libdevice.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ struct LocalArgsInfo {
6666
constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10;
6767

6868
struct LaunchInfo {
69-
// Don't move this field, we use it in AddressSanitizerPass
7069
uintptr_t PrivateShadowOffset = 0;
70+
uintptr_t PrivateShadowOffsetEnd = 0;
7171

7272
uintptr_t LocalShadowOffset = 0;
7373
uintptr_t LocalShadowOffsetEnd = 0;
@@ -82,8 +82,8 @@ constexpr unsigned ASAN_SHADOW_SCALE = 4;
8282
constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE;
8383

8484
// Based on the observation, only the last 24 bits of the address of the private
85-
// variable have changed, we use 31 bits(2G) to be safe.
86-
constexpr std::size_t ASAN_PRIVATE_SIZE = 0x7fffffffULL + 1;
85+
// variable have changed
86+
constexpr std::size_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1;
8787

8888
// These magic values are written to shadow for better error
8989
// reporting.

libdevice/sanitizer_utils.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] =
6565
static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] =
6666
"[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: "
6767
"%p)\n";
68+
static const __SYCL_CONSTANT__ char __private_shadow_out_of_bound[] =
69+
"[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: "
70+
"%p)\n";
6871

6972
static const __SYCL_CONSTANT__ char __asan_print_unsupport_device_type[] =
7073
"[kernel] Unsupport device type: %d\n";
@@ -123,7 +126,7 @@ inline uptr MemToShadow_DG2(uptr addr, uint32_t as) {
123126
}
124127

125128
if (shadow_ptr > __AsanShadowMemoryGlobalEnd) {
126-
if (__asan_report_out_of_shadow_bounds() && __AsanDebug) {
129+
if (__asan_report_out_of_shadow_bounds()) {
127130
__spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr);
128131
}
129132
}
@@ -171,7 +174,7 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) {
171174
}
172175

173176
if (shadow_ptr > __AsanShadowMemoryGlobalEnd) {
174-
if (__asan_report_out_of_shadow_bounds() && __AsanDebug) {
177+
if (__asan_report_out_of_shadow_bounds()) {
175178
__spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr,
176179
(uptr)__AsanShadowMemoryGlobalStart);
177180
}
@@ -207,13 +210,46 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) {
207210
((addr & (SLM_SIZE - 1)) >> ASAN_SHADOW_SCALE);
208211

209212
if (shadow_ptr > shadow_offset_end) {
210-
if (__asan_report_out_of_shadow_bounds() && __AsanDebug) {
213+
if (__asan_report_out_of_shadow_bounds()) {
211214
__spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr,
212215
wg_lid, (uptr)shadow_offset);
213216
}
214217
return 0;
215218
}
216219
return shadow_ptr;
220+
} else if (as == ADDRESS_SPACE_PRIVATE) { // private
221+
// work-group linear id
222+
const auto WG_LID =
223+
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
224+
__spirv_BuiltInNumWorkgroups.z +
225+
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
226+
__spirv_BuiltInWorkgroupId.z;
227+
228+
auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo;
229+
const auto shadow_offset = launch_info->PrivateShadowOffset;
230+
const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd;
231+
232+
if (shadow_offset == 0) {
233+
return 0;
234+
}
235+
236+
if (__AsanDebug)
237+
__spirv_ocl_printf(__mem_launch_info, launch_info,
238+
launch_info->PrivateShadowOffset, 0,
239+
launch_info->NumLocalArgs, launch_info->LocalArgs);
240+
241+
uptr shadow_ptr = shadow_offset +
242+
((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) +
243+
((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE);
244+
245+
if (shadow_ptr > shadow_offset_end) {
246+
if (__asan_report_out_of_shadow_bounds()) {
247+
__spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr,
248+
WG_LID, (uptr)shadow_offset);
249+
}
250+
return 0;
251+
}
252+
return shadow_ptr;
217253
}
218254

219255
return 0;
@@ -233,6 +269,8 @@ inline uptr MemToShadow(uptr addr, uint32_t as) {
233269
return shadow_ptr;
234270
}
235271

272+
// FIXME: OCL "O2" optimizer doesn't work well with following code
273+
#if 0
236274
if (__AsanDebug) {
237275
if (shadow_ptr) {
238276
if (as == ADDRESS_SPACE_PRIVATE)
@@ -244,6 +282,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) {
244282
__spirv_ocl_printf(__asan_print_shadow_value2, addr, as, shadow_ptr);
245283
}
246284
}
285+
#endif
247286

248287
return shadow_ptr;
249288
}
@@ -606,6 +645,14 @@ ASAN_REPORT_ERROR(store, true, 16)
606645
ASAN_REPORT_ERROR_N(load, false)
607646
ASAN_REPORT_ERROR_N(store, true)
608647

648+
///
649+
/// ASAN convert memory address to shadow memory address
650+
///
651+
652+
DEVICE_EXTERN_C_NOINLINE uptr __asan_mem_to_shadow(uptr ptr, uint32_t as) {
653+
return MemToShadow(ptr, as);
654+
}
655+
609656
///
610657
/// ASAN initialize shdadow memory of local memory
611658
///

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Lines changed: 55 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
179179
const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64";
180180
const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable";
181181

182+
const char kAsanMemToShadow[] = "__asan_mem_to_shadow";
183+
182184
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
183185
static const size_t kNumberOfAccessSizes = 5;
184186

@@ -447,7 +449,7 @@ static cl::opt<AsanDtorKind> ClOverrideDestructorKind(
447449
static cl::opt<bool>
448450
ClSpirOffloadPrivates("asan-spir-privates",
449451
cl::desc("instrument private pointer"), cl::Hidden,
450-
cl::init(false));
452+
cl::init(true));
451453

452454
static cl::opt<bool> ClSpirOffloadGlobals("asan-spir-globals",
453455
cl::desc("instrument global pointer"),
@@ -820,14 +822,15 @@ struct AddressSanitizer {
820822
Value *SizeArgument, uint32_t Exp,
821823
RuntimeCallInserter &RTCI);
822824
void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI);
823-
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
825+
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB,
826+
uint32_t AddressSpace = kSpirOffloadPrivateAS);
824827
bool suppressInstrumentationSiteForDebug(int &Instrumented);
825828
bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
826829
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
827830
bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
828831
void markEscapedLocalAllocas(Function &F);
829832
void instrumentSyclStaticLocalMemory(CallInst *CI);
830-
void instrumentSyclDynamicLocalMemory(Function &F);
833+
bool instrumentSyclDynamicLocalMemory(Function &F);
831834

832835
GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name,
833836
StringRef Value,
@@ -899,6 +902,8 @@ struct AddressSanitizer {
899902
FunctionCallee AMDGPUAddressPrivate;
900903
int InstrumentationWithCallsThreshold;
901904
uint32_t MaxInlinePoisoningSize;
905+
906+
FunctionCallee AsanMemToShadow;
902907
};
903908

904909
class ModuleAddressSanitizer {
@@ -1067,7 +1072,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
10671072
DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C),
10681073
IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)),
10691074
Mapping(ASan.Mapping),
1070-
PoisonStack(ClStack &&
1075+
PoisonStack((ClStack || ClSpirOffloadPrivates) &&
10711076
!Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {}
10721077

10731078
bool runOnFunction() {
@@ -1350,7 +1355,7 @@ static void ExtendSpirKernelArgs(Module &M, FunctionAnalysisManager &FAM) {
13501355
}
13511356

13521357
// Fixup all users
1353-
for (auto [F, NewF] : SpirFuncs) {
1358+
for (auto &[F, NewF] : SpirFuncs) {
13541359
SmallVector<User *, 16> Users(F->users());
13551360
for (User *U : Users) {
13561361
if (auto *CI = dyn_cast<CallInst>(U)) {
@@ -1544,13 +1549,13 @@ void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore,
15441549
Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy));
15451550
}
15461551

1547-
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
1552+
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB,
1553+
uint32_t AddressSpace) {
15481554
if (TargetTriple.isSPIR()) {
1549-
// ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart;
1550-
Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff));
1551-
Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
1552-
Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate);
1553-
return IRB.CreateAdd(Shadow, ShadowBase);
1555+
return IRB.CreateCall(
1556+
AsanMemToShadow,
1557+
{Shadow, ConstantInt::get(IRB.getInt32Ty(), AddressSpace)},
1558+
"shadow_ptr");
15541559
}
15551560
// Shadow >> scale
15561561
Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
@@ -1619,7 +1624,7 @@ void AddressSanitizer::instrumentSyclStaticLocalMemory(CallInst *CI) {
16191624
}
16201625

16211626
// Instument dynamic local memory
1622-
void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) {
1627+
bool AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) {
16231628
InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI());
16241629

16251630
// Save "__asan_launch" into local memory "__AsanLaunchInfo"
@@ -1631,13 +1636,12 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) {
16311636
SmallVector<Argument *> LocalArgs;
16321637
for (auto &Arg : F.args()) {
16331638
Type *PtrTy = dyn_cast<PointerType>(Arg.getType()->getScalarType());
1634-
// Local address space
1635-
if (PtrTy && PtrTy->getPointerAddressSpace() == 3)
1639+
if (PtrTy && PtrTy->getPointerAddressSpace() == kSpirOffloadLocalAS)
16361640
LocalArgs.push_back(&Arg);
16371641
}
16381642

16391643
if (LocalArgs.empty())
1640-
return;
1644+
return false;
16411645

16421646
AllocaInst *ArgsArray = IRB.CreateAlloca(
16431647
IntptrTy, ConstantInt::get(Int32Ty, LocalArgs.size()), "local_args");
@@ -1649,6 +1653,7 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) {
16491653
IRB.CreateCall(AsanSetShadowDynamicLocalFunc,
16501654
{IRB.CreatePointerCast(ArgsArray, IntptrTy),
16511655
ConstantInt::get(Int32Ty, LocalArgs.size())});
1656+
return true;
16521657
}
16531658

16541659
// Instrument memset/memmove/memcpy
@@ -3232,14 +3237,6 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
32323237
ArrayType::get(IRB.getInt8Ty(), 0));
32333238

32343239
if (TargetTriple.isSPIR()) {
3235-
AsanShadowDevicePrivate =
3236-
M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] {
3237-
return new GlobalVariable(M, IntptrTy, true,
3238-
GlobalVariable::ExternalLinkage, nullptr,
3239-
"__AsanShadowMemoryPrivateStart", nullptr,
3240-
GlobalVariable::NotThreadLocal, 1);
3241-
});
3242-
32433240
// __asan_set_shadow_static_local(
32443241
// uptr ptr,
32453242
// size_t size,
@@ -3263,6 +3260,9 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T
32633260
GlobalVariable::ExternalLinkage, nullptr, "__AsanLaunchInfo",
32643261
nullptr, GlobalVariable::NotThreadLocal, kSpirOffloadLocalAS);
32653262
});
3263+
3264+
AsanMemToShadow = M.getOrInsertFunction(kAsanMemToShadow, IntptrTy,
3265+
IntptrTy, Type::getInt32Ty(*C));
32663266
}
32673267

32683268
AMDGPUAddressShared =
@@ -3391,10 +3391,6 @@ bool AddressSanitizer::instrumentFunction(Function &F,
33913391
// can be passed to that intrinsic.
33923392
markEscapedLocalAllocas(F);
33933393

3394-
if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
3395-
instrumentSyclDynamicLocalMemory(F);
3396-
}
3397-
33983394
// We want to instrument every address only once per basic block (unless there
33993395
// are calls between uses).
34003396
SmallPtrSet<Value *, 16> TempsToInstrument;
@@ -3514,6 +3510,11 @@ bool AddressSanitizer::instrumentFunction(Function &F,
35143510
if (ChangedStack || !NoReturnCalls.empty())
35153511
FunctionModified = true;
35163512

3513+
// We need to instrument dynamic local arguments after stack poisoner
3514+
if (F.getCallingConv() == CallingConv::SPIR_KERNEL) {
3515+
FunctionModified |= instrumentSyclDynamicLocalMemory(F);
3516+
}
3517+
35173518
LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " "
35183519
<< F << "\n");
35193520

@@ -3999,32 +4000,39 @@ void FunctionStackPoisoner::processStaticAllocas() {
39994000
AI->replaceAllUsesWith(NewAllocaPtr);
40004001
}
40014002

4003+
auto TargetTriple = Triple(F.getParent()->getTargetTriple());
4004+
40024005
// The left-most redzone has enough space for at least 4 pointers.
4003-
// Write the Magic value to redzone[0].
40044006
Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
4005-
IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
4006-
BasePlus0);
4007-
// Write the frame description constant to redzone[1].
4008-
Value *BasePlus1 = IRB.CreateIntToPtr(
4009-
IRB.CreateAdd(LocalStackBase,
4010-
ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
4011-
IntptrPtrTy);
4012-
GlobalVariable *StackDescriptionGlobal =
4013-
createPrivateGlobalForString(*F.getParent(), DescriptionString,
4014-
/*AllowMerging*/ true, kAsanGenPrefix);
4015-
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
4016-
IRB.CreateStore(Description, BasePlus1);
4017-
// Write the PC to redzone[2].
4018-
Value *BasePlus2 = IRB.CreateIntToPtr(
4019-
IRB.CreateAdd(LocalStackBase,
4020-
ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
4021-
IntptrPtrTy);
4022-
IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
4007+
// SPIRV doesn't use the following metadata
4008+
if (!TargetTriple.isSPIR()) {
4009+
// Write the Magic value to redzone[0].
4010+
IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
4011+
BasePlus0);
4012+
// Write the frame description constant to redzone[1].
4013+
Value *BasePlus1 = IRB.CreateIntToPtr(
4014+
IRB.CreateAdd(LocalStackBase,
4015+
ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
4016+
IntptrPtrTy);
4017+
GlobalVariable *StackDescriptionGlobal =
4018+
createPrivateGlobalForString(*F.getParent(), DescriptionString,
4019+
/*AllowMerging*/ true, kAsanGenPrefix);
4020+
Value *Description =
4021+
IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
4022+
IRB.CreateStore(Description, BasePlus1);
4023+
// Write the PC to redzone[2].
4024+
Value *BasePlus2 = IRB.CreateIntToPtr(
4025+
IRB.CreateAdd(LocalStackBase,
4026+
ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
4027+
IntptrPtrTy);
4028+
IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
4029+
}
40234030

40244031
const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L);
40254032

40264033
// Poison the stack red zones at the entry.
4027-
Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
4034+
Value *ShadowBase =
4035+
ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS);
40284036
// As mask we must use most poisoned case: red zones and after scope.
40294037
// As bytes we can use either the same or just red zones only.
40304038
copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s
2+
3+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
4+
target triple = "spir64-unknown-unknown"
5+
6+
%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" }
7+
%"class.sycl::_V1::detail::array" = type { [1 x i64] }
8+
%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" }
9+
10+
@__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p = private unnamed_addr addrspace(1) constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
11+
12+
define spir_func i32 @_Z3fooPii(ptr addrspace(4) %p) {
13+
entry:
14+
%arrayidx = getelementptr inbounds i32, ptr addrspace(4) %p, i64 0
15+
%0 = load i32, ptr addrspace(4) %arrayidx, align 4
16+
ret i32 %0
17+
}
18+
19+
define spir_kernel void @kernel() #0 {
20+
; CHECK-LABEL: define spir_kernel void @kernel
21+
entry:
22+
%p.i = alloca [4 x i32], align 4
23+
; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0)
24+
call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i)
25+
call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false)
26+
%arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0
27+
%0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4)
28+
%call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0)
29+
ret void
30+
}
31+
32+
attributes #0 = { mustprogress norecurse nounwind sanitize_address uwtable }

0 commit comments

Comments
 (0)