Skip to content

[DeviceASAN] Fix multiple contexts in memory overhead statistics #17897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ int main() {
});
});
Q.wait();
// CHECK-STATS: Stats
// CHECK-NOT: Stats
// CHECK-STATS: Stats: Context
// CHECK-NOT: Stats: Context

sycl::free(array1, Q);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ AsanInterceptor::~AsanInterceptor() {

for (auto &[_, ShadowMemory] : m_ShadowMap) {
ShadowMemory->Destory();
getContext()->urDdiTable.Context.pfnRelease(ShadowMemory->Context);
}

for (auto Adapter : m_Adapters) {
Expand Down Expand Up @@ -323,16 +322,7 @@ AsanInterceptor::getOrCreateShadowMemory(ur_device_handle_t Device,
DeviceType Type) {
std::scoped_lock<ur_shared_mutex> Guard(m_ShadowMapMutex);
if (m_ShadowMap.find(Type) == m_ShadowMap.end()) {
ur_context_handle_t InternalContext;
auto Res = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr,
&InternalContext);
if (Res != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to create shadow context");
return nullptr;
}
std::shared_ptr<ContextInfo> CI;
insertContext(InternalContext, CI);
m_ShadowMap[Type] = CreateShadowMemory(InternalContext, Device, Type);
m_ShadowMap[Type] = CreateShadowMemory(Device, Type);
m_ShadowMap[Type]->Setup();
}
return m_ShadowMap[Type];
Expand Down
153 changes: 70 additions & 83 deletions unified-runtime/source/loader/layers/sanitizer/asan/asan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,39 @@
namespace ur_sanitizer_layer {
namespace asan {

std::shared_ptr<ShadowMemory> CreateShadowMemory(ur_context_handle_t Context,
ur_device_handle_t Device,
std::shared_ptr<ShadowMemory> CreateShadowMemory(ur_device_handle_t Device,
DeviceType Type) {
if (Type == DeviceType::CPU) {
return std::make_shared<ShadowMemoryCPU>(Context, Device);
} else if (Type == DeviceType::GPU_PVC) {
return std::make_shared<ShadowMemoryPVC>(Context, Device);
} else if (Type == DeviceType::GPU_DG2) {
return std::make_shared<ShadowMemoryDG2>(Context, Device);
} else {
getContext()->logger.error("Unsupport device type");
return nullptr;
switch (Type) {
case DeviceType::CPU:
return std::make_shared<ShadowMemoryCPU>(Device);
case DeviceType::GPU_PVC:
return std::make_shared<ShadowMemoryPVC>(Device);
case DeviceType::GPU_DG2:
return std::make_shared<ShadowMemoryDG2>(Device);
default:
die("CreateShadowMemory: Unsupport device type");
}
}

ur_result_t ShadowMemoryCPU::Setup() {
static ur_result_t Result = [this]() {
size_t ShadowSize = GetShadowSize();
ShadowBegin = MmapNoReserve(0, ShadowSize);
if (ShadowBegin == 0) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
DontCoredumpRange(ShadowBegin, ShadowSize);
ShadowEnd = ShadowBegin + ShadowSize;

// Set shadow memory for null pointer
// For CPU, we use a typical page size of 4K bytes.
constexpr size_t NullptrRedzoneSize = 4096;
auto URes = EnqueuePoisonShadow({}, 0, NullptrRedzoneSize,
kNullPointerRedzoneMagic);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
URes);
return URes;
}
size_t ShadowSize = GetShadowSize();
ShadowBegin = MmapNoReserve(0, ShadowSize);
if (ShadowBegin == 0) {
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
DontCoredumpRange(ShadowBegin, ShadowSize);
ShadowEnd = ShadowBegin + ShadowSize;

// Set shadow memory for null pointer
// For CPU, we use a typical page size of 4K bytes.
constexpr size_t NullptrRedzoneSize = 4096;
auto URes =
EnqueuePoisonShadow({}, 0, NullptrRedzoneSize, kNullPointerRedzoneMagic);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}", URes);
return URes;
}();
return Result;
}
return URes;
}

ur_result_t ShadowMemoryCPU::Destory() {
Expand Down Expand Up @@ -99,39 +94,33 @@ ur_result_t ShadowMemoryGPU::Setup() {
// we reserve shadow memory for each contexts, this will cause out-of-resource
// error when user uses multiple contexts. Therefore, we just create one
// shadow memory here.
static ur_result_t Result = [this]() {
const size_t ShadowSize = GetShadowSize();
// To reserve very large amount of GPU virtual memroy, the pStart param
// should be beyond the SVM range, so that GFX driver will automatically
// switch to reservation on the GPU heap.
const void *StartAddress = (void *)(0x100'0000'0000'0000ULL);
// TODO: Protect Bad Zone
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
Context, StartAddress, ShadowSize, (void **)&ShadowBegin);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error(
"Shadow memory reserved failed with size {}: {}", (void *)ShadowSize,
Result);
return Result;
}
ShadowEnd = ShadowBegin + ShadowSize;
// Retain the context which reserves shadow memory
getContext()->urDdiTable.Context.pfnRetain(Context);

// Set shadow memory for null pointer
// For GPU, wu use up to 1 page of shadow memory
const size_t NullptrRedzoneSize = GetVirtualMemGranularity(Context, Device)
<< ASAN_SHADOW_SCALE;
ManagedQueue Queue(Context, Device);
Result = EnqueuePoisonShadow(Queue, 0, NullptrRedzoneSize,
kNullPointerRedzoneMagic);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
Result);
return Result;
}
const size_t ShadowSize = GetShadowSize();
// To reserve very large amount of GPU virtual memroy, the pStart param
// should be beyond the SVM range, so that GFX driver will automatically
// switch to reservation on the GPU heap.
const void *StartAddress = (void *)(0x100'0000'0000'0000ULL);
// TODO: Protect Bad Zone
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
Context, StartAddress, ShadowSize, (void **)&ShadowBegin);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error("Shadow memory reserved failed with size {}: {}",
(void *)ShadowSize, Result);
return Result;
}();
}
ShadowEnd = ShadowBegin + ShadowSize;

// Set shadow memory for null pointer
// For GPU, wu use up to 1 page of shadow memory
const size_t NullptrRedzoneSize = GetVirtualMemGranularity(Context, Device)
<< ASAN_SHADOW_SCALE;
ManagedQueue Queue(Context, Device);
Result = EnqueuePoisonShadow(Queue, 0, NullptrRedzoneSize,
kNullPointerRedzoneMagic);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error("EnqueuePoisonShadow(NullPointerRZ): {}",
Result);
return Result;
}
return Result;
}

Expand All @@ -142,7 +131,13 @@ ur_result_t ShadowMemoryGPU::Destory() {
PrivateShadowOffset = 0;
}

static ur_result_t Result = [this]() {
if (LocalShadowOffset != 0) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context,
(void *)LocalShadowOffset));
LocalShadowOffset = 0;
}

{
const size_t PageSize = GetVirtualMemGranularity(Context, Device);
for (auto [MappedPtr, PhysicalMem] : VirtualMemMaps) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
Expand All @@ -151,24 +146,14 @@ ur_result_t ShadowMemoryGPU::Destory() {
}
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize()));
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
return UR_RESULT_SUCCESS;
}();
if (!Result) {
return Result;
}

if (LocalShadowOffset != 0) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context,
(void *)LocalShadowOffset));
LocalShadowOffset = 0;
}
if (ShadowBegin != 0) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize()));
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
ShadowBegin = ShadowEnd = 0;
if (ShadowBegin != 0) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize()));
ShadowBegin = ShadowEnd = 0;
}
}

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -248,7 +233,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
ur_context_handle_t QueueContext = GetContext(Queue);
auto ContextInfo = getAsanInterceptor()->getContextInfo(QueueContext);
if (LocalShadowOffset) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context,
(void *)LocalShadowOffset));
Expand Down Expand Up @@ -288,7 +274,8 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
ur_context_handle_t QueueContext = GetContext(Queue);
auto ContextInfo = getAsanInterceptor()->getContextInfo(QueueContext);
if (PrivateShadowOffset) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)PrivateShadowOffset));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,31 @@

#pragma once

#include "asan_allocator.hpp"
#include "sanitizer_common/sanitizer_common.hpp"
#include "sanitizer_common/sanitizer_libdevice.hpp"
#include "ur_sanitizer_layer.hpp"

#include <unordered_set>

namespace ur_sanitizer_layer {
namespace asan {

struct ShadowMemory {
ShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device)
: Context(Context), Device(Device) {
ShadowMemory(ur_device_handle_t Device) : Device(Device) {
[[maybe_unused]] ur_result_t URes =
getContext()->urDdiTable.Device.pfnRetain(Device);
assert(URes == UR_RESULT_SUCCESS);

// Create the internal context used for managing the shadow memory
URes = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr,
&Context);
assert(URes == UR_RESULT_SUCCESS);
}

virtual ~ShadowMemory() {
[[maybe_unused]] ur_result_t URes =
getContext()->urDdiTable.Device.pfnRelease(Device);
getContext()->urDdiTable.Context.pfnRelease(Context);
assert(URes == UR_RESULT_SUCCESS);

URes = getContext()->urDdiTable.Device.pfnRelease(Device);
assert(URes == UR_RESULT_SUCCESS);
}

Expand Down Expand Up @@ -64,8 +69,7 @@ struct ShadowMemory {
};

struct ShadowMemoryCPU final : public ShadowMemory {
ShadowMemoryCPU(ur_context_handle_t Context, ur_device_handle_t Device)
: ShadowMemory(Context, Device) {}
ShadowMemoryCPU(ur_device_handle_t Device) : ShadowMemory(Device) {}

ur_result_t Setup() override;

Expand Down Expand Up @@ -94,8 +98,7 @@ struct ShadowMemoryCPU final : public ShadowMemory {
};

struct ShadowMemoryGPU : public ShadowMemory {
ShadowMemoryGPU(ur_context_handle_t Context, ur_device_handle_t Device)
: ShadowMemory(Context, Device) {}
ShadowMemoryGPU(ur_device_handle_t Device) : ShadowMemory(Device) {}

ur_result_t Setup() override;

Expand Down Expand Up @@ -136,8 +139,7 @@ struct ShadowMemoryGPU : public ShadowMemory {
/// Device USM : 0x0800_0000_0000 ~ 0x17ff_ffff_ffff
///
struct ShadowMemoryPVC final : public ShadowMemoryGPU {
ShadowMemoryPVC(ur_context_handle_t Context, ur_device_handle_t Device)
: ShadowMemoryGPU(Context, Device) {}
ShadowMemoryPVC(ur_device_handle_t Device) : ShadowMemoryGPU(Device) {}

uptr MemToShadow(uptr Ptr) override;

Expand All @@ -155,16 +157,14 @@ struct ShadowMemoryPVC final : public ShadowMemoryGPU {
/// Device USM : 0x0800_0000_0000 ~ 0x0fff_ffff_ffff
///
struct ShadowMemoryDG2 final : public ShadowMemoryGPU {
ShadowMemoryDG2(ur_context_handle_t Context, ur_device_handle_t Device)
: ShadowMemoryGPU(Context, Device) {}
ShadowMemoryDG2(ur_device_handle_t Device) : ShadowMemoryGPU(Device) {}

uptr MemToShadow(uptr Ptr) override;

size_t GetShadowSize() override { return 0x100000000000ULL; }
};

std::shared_ptr<ShadowMemory> CreateShadowMemory(ur_context_handle_t Context,
ur_device_handle_t Device,
std::shared_ptr<ShadowMemory> CreateShadowMemory(ur_device_handle_t Device,
DeviceType Type);

} // namespace asan
Expand Down