Skip to content

Commit 8ddea15

Browse files
authored
[DeviceSanitizer] Refactor the code to manage shadow memory (#2127)
1 parent 51b68f8 commit 8ddea15

13 files changed

+730
-494
lines changed

source/loader/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,8 @@ if(UR_ENABLE_SANITIZER)
144144
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.hpp
145145
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.cpp
146146
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.hpp
147-
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.cpp
148-
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.hpp
147+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.cpp
148+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow.hpp
149149
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.cpp
150150
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.hpp
151151
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.cpp

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 114 additions & 239 deletions
Large diffs are not rendered by default.

source/loader/layers/sanitizer/asan_interceptor.hpp

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "asan_buffer.hpp"
1717
#include "asan_libdevice.hpp"
1818
#include "asan_options.hpp"
19+
#include "asan_shadow.hpp"
1920
#include "asan_statistics.hpp"
2021
#include "common.hpp"
2122
#include "ur_sanitizer_layer.hpp"
@@ -41,8 +42,7 @@ struct DeviceInfo {
4142

4243
DeviceType Type = DeviceType::UNKNOWN;
4344
size_t Alignment = 0;
44-
uptr ShadowOffset = 0;
45-
uptr ShadowOffsetEnd = 0;
45+
std::shared_ptr<ShadowMemory> Shadow;
4646

4747
// Device features
4848
bool IsSupportSharedSystemUSM = false;
@@ -106,6 +106,27 @@ struct KernelInfo {
106106
}
107107
};
108108

109+
struct ProgramInfo {
110+
ur_program_handle_t Handle;
111+
std::atomic<int32_t> RefCount = 1;
112+
113+
// lock this mutex if following fields are accessed
114+
ur_shared_mutex Mutex;
115+
std::unordered_set<std::shared_ptr<AllocInfo>> AllocInfoForGlobals;
116+
117+
explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) {
118+
[[maybe_unused]] auto Result =
119+
getContext()->urDdiTable.Program.pfnRetain(Handle);
120+
assert(Result == UR_RESULT_SUCCESS);
121+
}
122+
123+
~ProgramInfo() {
124+
[[maybe_unused]] auto Result =
125+
getContext()->urDdiTable.Program.pfnRelease(Handle);
126+
assert(Result == UR_RESULT_SUCCESS);
127+
}
128+
};
129+
109130
struct ContextInfo {
110131
ur_context_handle_t Handle;
111132
std::atomic<int32_t> RefCount = 1;
@@ -178,8 +199,10 @@ class SanitizerInterceptor {
178199
AllocType Type, void **ResultPtr);
179200
ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr);
180201

181-
ur_result_t registerDeviceGlobals(ur_context_handle_t Context,
182-
ur_program_handle_t Program);
202+
ur_result_t registerProgram(ur_context_handle_t Context,
203+
ur_program_handle_t Program);
204+
205+
ur_result_t unregisterProgram(ur_program_handle_t Program);
183206

184207
ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel,
185208
ur_queue_handle_t Queue,
@@ -197,6 +220,9 @@ class SanitizerInterceptor {
197220
std::shared_ptr<DeviceInfo> &CI);
198221
ur_result_t eraseDevice(ur_device_handle_t Device);
199222

223+
ur_result_t insertProgram(ur_program_handle_t Program);
224+
ur_result_t eraseProgram(ur_program_handle_t Program);
225+
200226
ur_result_t insertKernel(ur_kernel_handle_t Kernel);
201227
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);
202228

@@ -231,6 +257,12 @@ class SanitizerInterceptor {
231257
return m_DeviceMap[Device];
232258
}
233259

260+
std::shared_ptr<ProgramInfo> getProgramInfo(ur_program_handle_t Program) {
261+
std::shared_lock<ur_shared_mutex> Guard(m_ProgramMapMutex);
262+
assert(m_ProgramMap.find(Program) != m_ProgramMap.end());
263+
return m_ProgramMap[Program];
264+
}
265+
234266
std::shared_ptr<KernelInfo> getKernelInfo(ur_kernel_handle_t Kernel) {
235267
std::shared_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
236268
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
@@ -243,8 +275,8 @@ class SanitizerInterceptor {
243275
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
244276
std::shared_ptr<DeviceInfo> &DeviceInfo,
245277
ur_queue_handle_t Queue);
246-
ur_result_t enqueueAllocInfo(std::shared_ptr<ContextInfo> &ContextInfo,
247-
std::shared_ptr<DeviceInfo> &DeviceInfo,
278+
279+
ur_result_t enqueueAllocInfo(std::shared_ptr<DeviceInfo> &DeviceInfo,
248280
ur_queue_handle_t Queue,
249281
std::shared_ptr<AllocInfo> &AI);
250282

@@ -266,6 +298,10 @@ class SanitizerInterceptor {
266298
m_DeviceMap;
267299
ur_shared_mutex m_DeviceMapMutex;
268300

301+
std::unordered_map<ur_program_handle_t, std::shared_ptr<ProgramInfo>>
302+
m_ProgramMap;
303+
ur_shared_mutex m_ProgramMapMutex;
304+
269305
std::unordered_map<ur_kernel_handle_t, std::shared_ptr<KernelInfo>>
270306
m_KernelMap;
271307
ur_shared_mutex m_KernelMapMutex;

source/loader/layers/sanitizer/asan_options.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct AsanOptions {
2020
bool Debug = false;
2121
uint64_t MinRZSize = 16;
2222
uint64_t MaxRZSize = 2048;
23-
uint32_t MaxQuarantineSizeMB = 0;
23+
uint32_t MaxQuarantineSizeMB = 8;
2424
bool DetectLocals = true;
2525
bool DetectPrivates = true;
2626
bool PrintStats = false;
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan_shadow.cpp
10+
*
11+
*/
12+
13+
#include "asan_shadow.hpp"
14+
#include "asan_interceptor.hpp"
15+
#include "asan_libdevice.hpp"
16+
#include "ur_sanitizer_layer.hpp"
17+
#include "ur_sanitizer_utils.hpp"
18+
19+
namespace ur_sanitizer_layer {
20+
21+
std::shared_ptr<ShadowMemory> GetShadowMemory(ur_context_handle_t Context,
22+
ur_device_handle_t Device,
23+
DeviceType Type) {
24+
if (Type == DeviceType::CPU) {
25+
static std::shared_ptr<ShadowMemory> ShadowCPU =
26+
std::make_shared<ShadowMemoryCPU>(Context, Device);
27+
return ShadowCPU;
28+
} else if (Type == DeviceType::GPU_PVC) {
29+
static std::shared_ptr<ShadowMemory> ShadowPVC =
30+
std::make_shared<ShadowMemoryPVC>(Context, Device);
31+
return ShadowPVC;
32+
} else if (Type == DeviceType::GPU_DG2) {
33+
static std::shared_ptr<ShadowMemory> ShadowDG2 =
34+
std::make_shared<ShadowMemoryDG2>(Context, Device);
35+
return ShadowDG2;
36+
} else {
37+
getContext()->logger.error("Unsupport device type");
38+
return nullptr;
39+
}
40+
}
41+
42+
ur_result_t ShadowMemoryCPU::Setup() {
43+
static ur_result_t Result = [this]() {
44+
size_t ShadowSize = GetShadowSize();
45+
ShadowBegin = MmapNoReserve(0, ShadowSize);
46+
if (ShadowBegin == 0) {
47+
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
48+
}
49+
DontCoredumpRange(ShadowBegin, ShadowSize);
50+
ShadowEnd = ShadowBegin + ShadowSize;
51+
52+
// Set shadow memory for null pointer
53+
auto URes = EnqueuePoisonShadow({}, 0, 1, kNullPointerRedzoneMagic);
54+
if (URes != UR_RESULT_SUCCESS) {
55+
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
56+
URes);
57+
return URes;
58+
}
59+
return URes;
60+
}();
61+
return Result;
62+
}
63+
64+
ur_result_t ShadowMemoryCPU::Destory() {
65+
if (ShadowBegin == 0) {
66+
return UR_RESULT_SUCCESS;
67+
}
68+
static ur_result_t Result = [this]() {
69+
if (!Munmap(ShadowBegin, GetShadowSize())) {
70+
return UR_RESULT_ERROR_UNKNOWN;
71+
}
72+
return UR_RESULT_SUCCESS;
73+
}();
74+
return Result;
75+
}
76+
77+
uptr ShadowMemoryCPU::MemToShadow(uptr Ptr) {
78+
return ShadowBegin + (Ptr >> ASAN_SHADOW_SCALE);
79+
}
80+
81+
ur_result_t ShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, uptr Ptr,
82+
uptr Size, u8 Value) {
83+
if (Size == 0) {
84+
return UR_RESULT_SUCCESS;
85+
}
86+
87+
uptr ShadowBegin = MemToShadow(Ptr);
88+
uptr ShadowEnd = MemToShadow(Ptr + Size - 1);
89+
assert(ShadowBegin <= ShadowEnd);
90+
getContext()->logger.debug(
91+
"EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin,
92+
ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value);
93+
memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1);
94+
95+
return UR_RESULT_SUCCESS;
96+
}
97+
98+
ur_result_t ShadowMemoryGPU::Setup() {
99+
// Currently, Level-Zero doesn't create independent VAs for each contexts, if we reserve
100+
// shadow memory for each contexts, this will cause out-of-resource error when user uses
101+
// multiple contexts. Therefore, we just create one shadow memory here.
102+
static ur_result_t Result = [this]() {
103+
size_t ShadowSize = GetShadowSize();
104+
// TODO: Protect Bad Zone
105+
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
106+
Context, nullptr, ShadowSize, (void **)&ShadowBegin);
107+
if (Result == UR_RESULT_SUCCESS) {
108+
ShadowEnd = ShadowBegin + ShadowSize;
109+
// Retain the context which reserves shadow memory
110+
getContext()->urDdiTable.Context.pfnRetain(Context);
111+
}
112+
113+
// Set shadow memory for null pointer
114+
ManagedQueue Queue(Context, Device);
115+
116+
Result = EnqueuePoisonShadow(Queue, 0, 1, kNullPointerRedzoneMagic);
117+
if (Result != UR_RESULT_SUCCESS) {
118+
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
119+
Result);
120+
return Result;
121+
}
122+
return Result;
123+
}();
124+
return Result;
125+
}
126+
127+
ur_result_t ShadowMemoryGPU::Destory() {
128+
if (ShadowBegin == 0) {
129+
return UR_RESULT_SUCCESS;
130+
}
131+
static ur_result_t Result = [this]() {
132+
auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
133+
Context, (const void *)ShadowBegin, GetShadowSize());
134+
getContext()->urDdiTable.Context.pfnRelease(Context);
135+
return Result;
136+
}();
137+
return Result;
138+
}
139+
140+
ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
141+
uptr Ptr, uptr Size,
142+
u8 Value) {
143+
if (Size == 0) {
144+
return UR_RESULT_SUCCESS;
145+
}
146+
147+
uptr ShadowBegin = MemToShadow(Ptr);
148+
uptr ShadowEnd = MemToShadow(Ptr + Size - 1);
149+
assert(ShadowBegin <= ShadowEnd);
150+
{
151+
static const size_t PageSize =
152+
GetVirtualMemGranularity(Context, Device);
153+
154+
ur_physical_mem_properties_t Desc{
155+
UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0};
156+
157+
// Make sure [Ptr, Ptr + Size] is mapped to physical memory
158+
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
159+
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
160+
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
161+
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
162+
ur_physical_mem_handle_t PhysicalMem{};
163+
auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate(
164+
Context, Device, PageSize, &Desc, &PhysicalMem);
165+
if (URes != UR_RESULT_SUCCESS) {
166+
getContext()->logger.error("urPhysicalMemCreate(): {}",
167+
URes);
168+
return URes;
169+
}
170+
171+
URes = getContext()->urDdiTable.VirtualMem.pfnMap(
172+
Context, (void *)MappedPtr, PageSize, PhysicalMem, 0,
173+
UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
174+
if (URes != UR_RESULT_SUCCESS) {
175+
getContext()->logger.error("urVirtualMemMap({}, {}): {}",
176+
(void *)MappedPtr, PageSize,
177+
URes);
178+
return URes;
179+
}
180+
181+
getContext()->logger.debug("urVirtualMemMap: {} ~ {}",
182+
(void *)MappedPtr,
183+
(void *)(MappedPtr + PageSize - 1));
184+
185+
// Initialize to zero
186+
URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0,
187+
PageSize);
188+
if (URes != UR_RESULT_SUCCESS) {
189+
getContext()->logger.error("EnqueueUSMBlockingSet(): {}",
190+
URes);
191+
return URes;
192+
}
193+
194+
VirtualMemMaps[MappedPtr].first = PhysicalMem;
195+
}
196+
197+
// We don't need to record virtual memory map for null pointer,
198+
// since it doesn't have an alloc info.
199+
if (Ptr == 0) {
200+
continue;
201+
}
202+
203+
auto AllocInfoIt =
204+
getContext()->interceptor->findAllocInfoByAddress(Ptr);
205+
assert(AllocInfoIt);
206+
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second);
207+
}
208+
}
209+
210+
auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value,
211+
ShadowEnd - ShadowBegin + 1);
212+
getContext()->logger.debug(
213+
"EnqueuePoisonShadow (addr={}, count={}, value={}): {}",
214+
(void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value,
215+
URes);
216+
if (URes != UR_RESULT_SUCCESS) {
217+
getContext()->logger.error("EnqueueUSMBlockingSet(): {}", URes);
218+
return URes;
219+
}
220+
221+
return UR_RESULT_SUCCESS;
222+
}
223+
224+
ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
225+
uptr ShadowBegin = MemToShadow(AI->AllocBegin);
226+
uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize);
227+
assert(ShadowBegin <= ShadowEnd);
228+
229+
static const size_t PageSize = GetVirtualMemGranularity(Context, Device);
230+
231+
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
232+
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
233+
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
234+
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
235+
continue;
236+
}
237+
VirtualMemMaps[MappedPtr].second.erase(AI);
238+
if (VirtualMemMaps[MappedPtr].second.empty()) {
239+
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
240+
Context, (void *)MappedPtr, PageSize));
241+
UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease(
242+
VirtualMemMaps[MappedPtr].first));
243+
getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}",
244+
(void *)MappedPtr,
245+
(void *)(MappedPtr + PageSize - 1));
246+
}
247+
}
248+
249+
return UR_RESULT_SUCCESS;
250+
}
251+
252+
uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
253+
if (Ptr & 0xFF00000000000000ULL) { // Device USM
254+
return ShadowBegin + 0x80000000000ULL +
255+
((Ptr & 0xFFFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
256+
} else { // Only consider 47bit VA
257+
return ShadowBegin + ((Ptr & 0x7FFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
258+
}
259+
}
260+
261+
uptr ShadowMemoryDG2::MemToShadow(uptr Ptr) {
262+
if (Ptr & 0xFFFF000000000000ULL) { // Device USM
263+
return ShadowBegin + 0x80000000000ULL +
264+
((Ptr & 0x7FFFFFFFFFFFULL) >> ASAN_SHADOW_SCALE);
265+
} else { // Host/Shared USM
266+
return ShadowBegin + (Ptr >> ASAN_SHADOW_SCALE);
267+
}
268+
}
269+
270+
} // namespace ur_sanitizer_layer

0 commit comments

Comments
 (0)