Skip to content

Commit 2c2b222

Browse files
authored
Merge pull request #2394 from zhaomaosu/do-alloc-use-pool
[DevASAN] Do allocation with USM pool to reduce memory overhead
2 parents cc7e8a1 + 31ba4df commit 2c2b222

File tree

4 files changed

+64
-80
lines changed

4 files changed

+64
-80
lines changed

source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ AsanInterceptor::~AsanInterceptor() {
3636
// We must release these objects before releasing adapters, since
3737
// they may use the adapter in their destructor
3838
for (const auto &[_, DeviceInfo] : m_DeviceMap) {
39-
DeviceInfo->Shadow->Destory();
39+
[[maybe_unused]] auto URes = DeviceInfo->Shadow->Destory();
40+
assert(URes == UR_RESULT_SUCCESS);
4041
}
4142

4243
m_Quarantine = nullptr;
@@ -96,6 +97,10 @@ ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context,
9697

9798
void *Allocated = nullptr;
9899

100+
if (Pool == nullptr) {
101+
Pool = ContextInfo->getUSMPool();
102+
}
103+
99104
if (Type == AllocType::DEVICE_USM) {
100105
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
101106
Context, Device, Properties, Pool, NeededSize, &Allocated));
@@ -228,16 +233,6 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
228233
ContextInfo->Stats.UpdateUSMRealFreed(
229234
ToFreeAllocInfo->AllocSize, ToFreeAllocInfo->getRedzoneSize());
230235

231-
if (ToFreeAllocInfo->Type == AllocType::HOST_USM) {
232-
for (auto &Device : ContextInfo->DeviceList) {
233-
UR_CALL(getDeviceInfo(Device)->Shadow->ReleaseShadow(
234-
ToFreeAllocInfo));
235-
}
236-
} else {
237-
UR_CALL(getDeviceInfo(ToFreeAllocInfo->Device)
238-
->Shadow->ReleaseShadow(ToFreeAllocInfo));
239-
}
240-
241236
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
242237
Context, (void *)(ToFreeAllocInfo->AllocBegin)));
243238

@@ -436,12 +431,6 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
436431
auto ProgramInfo = getProgramInfo(Program);
437432
assert(ProgramInfo != nullptr && "unregistered program!");
438433

439-
for (auto AI : ProgramInfo->AllocInfoForGlobals) {
440-
UR_CALL(getDeviceInfo(AI->Device)->Shadow->ReleaseShadow(AI));
441-
m_AllocationMap.erase(AI->AllocBegin);
442-
}
443-
ProgramInfo->AllocInfoForGlobals.clear();
444-
445434
ProgramInfo->InstrumentedKernels.clear();
446435

447436
return UR_RESULT_SUCCESS;
@@ -560,10 +549,6 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
560549
{}});
561550

562551
ContextInfo->insertAllocInfo({Device}, AI);
563-
ProgramInfo->AllocInfoForGlobals.emplace(AI);
564-
565-
std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
566-
m_AllocationMap.emplace(AI->AllocBegin, std::move(AI));
567552
}
568553
}
569554

@@ -887,9 +872,14 @@ bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const {
887872
ContextInfo::~ContextInfo() {
888873
Stats.Print(Handle);
889874

890-
[[maybe_unused]] auto Result =
891-
getContext()->urDdiTable.Context.pfnRelease(Handle);
892-
assert(Result == UR_RESULT_SUCCESS);
875+
[[maybe_unused]] ur_result_t URes;
876+
if (USMPool) {
877+
URes = getContext()->urDdiTable.USM.pfnPoolRelease(USMPool);
878+
assert(URes == UR_RESULT_SUCCESS);
879+
}
880+
881+
URes = getContext()->urDdiTable.Context.pfnRelease(Handle);
882+
assert(URes == UR_RESULT_SUCCESS);
893883

894884
// check memory leaks
895885
if (getAsanInterceptor()->getOptions().DetectLeaks &&
@@ -905,6 +895,22 @@ ContextInfo::~ContextInfo() {
905895
}
906896
}
907897

898+
ur_usm_pool_handle_t ContextInfo::getUSMPool() {
899+
std::call_once(PoolInit, [this]() {
900+
ur_usm_pool_desc_t Desc{UR_STRUCTURE_TYPE_USM_POOL_DESC, nullptr, 0};
901+
auto URes =
902+
getContext()->urDdiTable.USM.pfnPoolCreate(Handle, &Desc, &USMPool);
903+
if (URes != UR_RESULT_SUCCESS &&
904+
URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
905+
getContext()->logger.warning(
906+
"Failed to create USM pool, the memory overhead "
907+
"may increase: {}",
908+
URes);
909+
}
910+
});
911+
return USMPool;
912+
}
913+
908914
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
909915
[[maybe_unused]] ur_result_t Result;
910916
if (Host.LocalArgs) {

source/loader/layers/sanitizer/asan/asan_interceptor.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ struct ProgramInfo {
112112
std::atomic<int32_t> RefCount = 1;
113113

114114
// Program is built only once, so we don't need to lock it
115-
std::unordered_set<std::shared_ptr<AllocInfo>> AllocInfoForGlobals;
116115
std::unordered_set<std::string> InstrumentedKernels;
117116

118117
explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) {
@@ -132,6 +131,10 @@ struct ProgramInfo {
132131

133132
struct ContextInfo {
134133
ur_context_handle_t Handle;
134+
135+
ur_usm_pool_handle_t USMPool{};
136+
std::once_flag PoolInit;
137+
135138
std::atomic<int32_t> RefCount = 1;
136139

137140
std::vector<ur_device_handle_t> DeviceList;
@@ -155,6 +158,8 @@ struct ContextInfo {
155158
AllocInfos.List.emplace_back(AI);
156159
}
157160
}
161+
162+
ur_usm_pool_handle_t getUSMPool();
158163
};
159164

160165
struct AsanRuntimeDataWrapper {

source/loader/layers/sanitizer/asan/asan_shadow.cpp

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,15 @@ ur_result_t ShadowMemoryGPU::Setup() {
108108
// TODO: Protect Bad Zone
109109
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
110110
Context, nullptr, ShadowSize, (void **)&ShadowBegin);
111-
if (Result == UR_RESULT_SUCCESS) {
112-
ShadowEnd = ShadowBegin + ShadowSize;
113-
// Retain the context which reserves shadow memory
114-
getContext()->urDdiTable.Context.pfnRetain(Context);
111+
if (Result != UR_RESULT_SUCCESS) {
112+
getContext()->logger.error(
113+
"Shadow memory reserved failed with size {}: {}",
114+
(void *)ShadowSize, Result);
115+
return Result;
115116
}
117+
ShadowEnd = ShadowBegin + ShadowSize;
118+
// Retain the context which reserves shadow memory
119+
getContext()->urDdiTable.Context.pfnRetain(Context);
116120

117121
// Set shadow memory for null pointer
118122
// For GPU, wu use up to 1 page of shadow memory
@@ -137,6 +141,24 @@ ur_result_t ShadowMemoryGPU::Destory() {
137141
Context, (void *)PrivateShadowOffset));
138142
PrivateShadowOffset = 0;
139143
}
144+
145+
static ur_result_t Result = [this]() {
146+
const size_t PageSize = GetVirtualMemGranularity(Context, Device);
147+
for (auto [MappedPtr, PhysicalMem] : VirtualMemMaps) {
148+
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
149+
Context, (void *)MappedPtr, PageSize));
150+
UR_CALL(
151+
getContext()->urDdiTable.PhysicalMem.pfnRelease(PhysicalMem));
152+
}
153+
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
154+
Context, (const void *)ShadowBegin, GetShadowSize()));
155+
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
156+
return UR_RESULT_SUCCESS;
157+
}();
158+
if (!Result) {
159+
return Result;
160+
}
161+
140162
if (LocalShadowOffset != 0) {
141163
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
142164
Context, (void *)LocalShadowOffset));
@@ -205,19 +227,8 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
205227
return URes;
206228
}
207229

208-
VirtualMemMaps[MappedPtr].first = PhysicalMem;
230+
VirtualMemMaps[MappedPtr] = PhysicalMem;
209231
}
210-
211-
// We don't need to record virtual memory map for null pointer,
212-
// since it doesn't have an alloc info.
213-
if (Ptr == 0) {
214-
continue;
215-
}
216-
217-
auto AllocInfoIt =
218-
getAsanInterceptor()->findAllocInfoByAddress(Ptr);
219-
assert(AllocInfoIt);
220-
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second);
221232
}
222233
}
223234

@@ -235,35 +246,6 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
235246
return UR_RESULT_SUCCESS;
236247
}
237248

238-
ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
239-
uptr ShadowBegin = MemToShadow(AI->AllocBegin);
240-
uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize);
241-
assert(ShadowBegin <= ShadowEnd);
242-
243-
static const size_t PageSize = GetVirtualMemGranularity(Context, Device);
244-
245-
for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
246-
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
247-
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
248-
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
249-
continue;
250-
}
251-
VirtualMemMaps[MappedPtr].second.erase(AI);
252-
if (VirtualMemMaps[MappedPtr].second.empty()) {
253-
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
254-
Context, (void *)MappedPtr, PageSize));
255-
UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease(
256-
VirtualMemMaps[MappedPtr].first));
257-
getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}",
258-
(void *)MappedPtr,
259-
(void *)(MappedPtr + PageSize - 1));
260-
VirtualMemMaps.erase(MappedPtr);
261-
}
262-
}
263-
264-
return UR_RESULT_SUCCESS;
265-
}
266-
267249
ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
268250
uint32_t NumWG, uptr &Begin,
269251
uptr &End) {

source/loader/layers/sanitizer/asan/asan_shadow.hpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ struct ShadowMemory {
3535
virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr,
3636
uptr Size, u8 Value) = 0;
3737

38-
virtual ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo>) {
39-
return UR_RESULT_SUCCESS;
40-
}
41-
4238
virtual size_t GetShadowSize() = 0;
4339

4440
virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
@@ -98,8 +94,6 @@ struct ShadowMemoryGPU : public ShadowMemory {
9894
ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr,
9995
uptr Size, u8 Value) override final;
10096

101-
ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;
102-
10397
ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
10498
uptr &Begin, uptr &End) override final;
10599

@@ -108,10 +102,7 @@ struct ShadowMemoryGPU : public ShadowMemory {
108102

109103
ur_mutex VirtualMemMapsMutex;
110104

111-
std::unordered_map<
112-
uptr, std::pair<ur_physical_mem_handle_t,
113-
std::unordered_set<std::shared_ptr<AllocInfo>>>>
114-
VirtualMemMaps;
105+
std::unordered_map<uptr, ur_physical_mem_handle_t> VirtualMemMaps;
115106

116107
uptr LocalShadowOffset = 0;
117108

0 commit comments

Comments
 (0)